diff --git a/Dockerfile b/Dockerfile index 2fbbfed9c499d..c9be2691525be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,9 +32,11 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et # Packaged dependencies RUN apt-get update && apt-get install -y \ apparmor \ + asciidoc \ aufs-tools \ automake \ bash-completion \ + bsdmainutils \ btrfs-tools \ build-essential \ createrepo \ @@ -43,15 +45,22 @@ RUN apt-get update && apt-get install -y \ gcc-mingw-w64 \ git \ iptables \ + libaio-dev \ libapparmor-dev \ libcap-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ libsystemd-journal-dev \ mercurial \ parallel \ pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ + python-minimal \ python-mock \ python-pip \ + python-protobuf \ python-websocket \ reprepro \ ruby1.9.1 \ @@ -59,6 +68,7 @@ RUN apt-get update && apt-get install -y \ s3cmd=1.1.0* \ ubuntu-zfs \ xfsprogs \ + xmlto \ libzfs-dev \ --no-install-recommends @@ -73,6 +83,13 @@ RUN cd /usr/local/lvm2 \ && make install_device-mapper # see https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL +# Install Criu +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 +RUN cd /usr/src/criu \ + && make \ + && make install + # Install Go ENV GO_VERSION 1.5.1 RUN curl -sSL "https://storage.googleapis.com/golang/go${GO_VERSION}.linux-amd64.tar.gz" | tar -v -C /usr/local -xz diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go new file mode 100644 index 0000000000000..9655e68de221f --- /dev/null +++ b/api/client/checkpoint.go @@ -0,0 +1,55 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +// CmdCheckpoint checkpoints the process running in a container +// +// Usage: docker checkpoint CONTAINER +func (cli *DockerCli) CmdCheckpoint(args ...string) error { + cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file") + flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + criuOpts := &runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + LeaveRunning: *flLeaveRunning, + TCPEstablished: true, + ExternalUnixConnections: true, + FileLocks: true, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/client/restore.go b/api/client/restore.go new file mode 100644 index 0000000000000..e73b62b509303 --- /dev/null +++ b/api/client/restore.go @@ -0,0 +1,57 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +// CmdRestore restores the process in a checkpointed container +// +// Usage: docker restore CONTAINER +func (cli *DockerCli) CmdRestore(args ...string) error { + cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log") + flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + restoreOpts := &runconfig.RestoreConfig{ + CriuOpts: runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + TCPEstablished: true, + ExternalUnixConnections: true, + FileLocks: true, + }, + ForceRestore: *flForce, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to restore one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/server/router/local/local.go b/api/server/router/local/local.go index b27031bd24693..a9e0a00670ca4 100644 --- a/api/server/router/local/local.go +++ b/api/server/router/local/local.go @@ -145,6 +145,8 @@ func (r *router) initRoutes() { NewDeleteRoute("/containers/{name:.*}", r.deleteContainers), NewDeleteRoute("/images/{name:.*}", r.deleteImages), } + + addExperimentalRoutes(r) } func optionsHandler(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { diff --git a/api/server/router/local/local_experimental.go b/api/server/router/local/local_experimental.go new file mode 100644 index 0000000000000..56da2f2a97924 --- /dev/null +++ b/api/server/router/local/local_experimental.go @@ -0,0 +1,65 @@ +// +build experimental + +package local + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/docker/docker/api/server/httputils" + dkrouter "github.com/docker/docker/api/server/router" + "github.com/docker/docker/runconfig" + "golang.org/x/net/context" +) + +func addExperimentalRoutes(r *router) { + newRoutes := []dkrouter.Route{ + NewPostRoute("/containers/{name:.*}/checkpoint", r.postContainersCheckpoint), + NewPostRoute("/containers/{name:.*}/restore", r.postContainersRestore), + } + + r.routes = append(r.routes, newRoutes...) +} + +func (s *router) postContainersCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := httputils.CheckForJSON(r); err != nil { + return err + } + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { + return err + } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *router) postContainersRestore(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := httputils.CheckForJSON(r); err != nil { + return err + } + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { + return err + } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} diff --git a/api/server/router/local/local_stable.go b/api/server/router/local/local_stable.go new file mode 100644 index 0000000000000..7c6c012be06ae --- /dev/null +++ b/api/server/router/local/local_stable.go @@ -0,0 +1,6 @@ +// +build !experimental + +package local + +func addExperimentalRoutes(r *router) { +} diff --git a/api/types/types.go b/api/types/types.go index a6cec44aac2db..e87441f65846c 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -235,17 +235,19 @@ type ExecStartCheck struct { // ContainerState stores container's running state // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { - Status string - Running bool - Paused bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt string - FinishedAt string + Status string + Running bool + Paused bool + Checkpointed bool + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt string + FinishedAt string + CheckpointedAt string `json:"-"` } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000000..81c3a0d327dac --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,65 @@ +package daemon + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/docker/docker/runconfig" +) + +// ContainerCheckpoint checkpoints the process running in a container with CRIU +func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + if !container.IsRunning() { + return fmt.Errorf("Container %s not running", name) + } + + if opts.ImagesDirectory == "" { + opts.ImagesDirectory = filepath.Join(container.root, "criu.image") + if err := os.MkdirAll(opts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) { + return err + } + } + + if opts.WorkDirectory == "" { + opts.WorkDirectory = filepath.Join(container.root, "criu.work") + if err := os.MkdirAll(opts.WorkDirectory, 0755); err != nil && !os.IsExist(err) { + return err + } + } + + if err := daemon.Checkpoint(container, opts); err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + container.SetCheckpointed(opts.LeaveRunning) + + if opts.LeaveRunning == false { + daemon.Cleanup(container) + } + + // commit the filesystem as well, support AUFS only + commitCfg := &ContainerCommitConfig{ + Pause: true, + Config: container.Config, + } + img, err := daemon.Commit(name, commitCfg) + if err != nil { + return err + } + // Update the criu image path and image ID of the container + criuImagePath := opts.ImagesDirectory + container.CriuimagePaths[criuImagePath] = img.ID + // Update image layer of the committed container + container.ImageID = img.ID + + if err := container.toDisk(); err != nil { + return fmt.Errorf("Cannot update config for container: %s", err) + } + + return nil +} diff --git a/daemon/container.go b/daemon/container.go index 528dcf886757d..5bbf4feb61f75 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -411,7 +411,6 @@ func attach(streamConfig *streamConfig, openStdin, stdinOnce, tty bool, stdin io _, err = copyEscapable(cStdin, stdin) } else { _, err = io.Copy(cStdin, stdin) - } if err == io.ErrClosedPipe { err = nil diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 91548b73c8a86..ba36cd2c098db 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -61,6 +61,8 @@ type Container struct { Volumes map[string]string // Deprecated since 1.7, kept for backwards compatibility VolumesRW map[string]bool // Deprecated since 1.7, kept for backwards compatibility + + CriuimagePaths map[string]string // Format: } func killProcessDirectly(container *Container) error { @@ -807,7 +809,7 @@ func (daemon *Daemon) updateNetwork(container *Container) error { return nil } -func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([]libnetwork.EndpointOption, error) { +func (container *Container) buildCreateEndpointOptions(n libnetwork.Network, isRestoring bool) ([]libnetwork.EndpointOption, error) { var ( portSpecs = make(nat.PortSet) bindings = make(nat.PortMap) @@ -896,10 +898,18 @@ func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([] createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) } + /*if isRestoring && container.NetworkSettings.IPAddress != "" { + genericOption := options.Generic{ + netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), + } + + createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) + }*/ + return createOptions, nil } -func (daemon *Daemon) allocateNetwork(container *Container) error { +func (daemon *Daemon) allocateNetwork(container *Container, isRestoring bool) error { controller := daemon.netController // Cleanup any stale sandbox left over due to ungraceful daemon shutdown @@ -931,7 +941,7 @@ func (daemon *Daemon) allocateNetwork(container *Container) error { } for n := range container.NetworkSettings.Networks { - if err := daemon.connectToNetwork(container, n, updateSettings); err != nil { + if err := daemon.connectToNetwork(container, n, updateSettings, isRestoring); err != nil { return err } } @@ -956,7 +966,7 @@ func (daemon *Daemon) ConnectToNetwork(container *Container, idOrName string) er if !container.Running { return derr.ErrorCodeNotRunning.WithArgs(container.ID) } - if err := daemon.connectToNetwork(container, idOrName, true); err != nil { + if err := daemon.connectToNetwork(container, idOrName, true, false); err != nil { return err } if err := container.toDiskLocking(); err != nil { @@ -965,7 +975,7 @@ func (daemon *Daemon) ConnectToNetwork(container *Container, idOrName string) er return nil } -func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, updateSettings bool) (err error) { +func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, updateSettings bool, isRestoring bool) (err error) { if container.hostConfig.NetworkMode.IsContainer() { return runconfig.ErrConflictSharedNetwork } @@ -998,7 +1008,7 @@ func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, up return err } - createOptions, err := container.buildCreateEndpointOptions(n) + createOptions, err := container.buildCreateEndpointOptions(n, isRestoring) if err != nil { return err } @@ -1045,7 +1055,7 @@ func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, up return nil } -func (daemon *Daemon) initializeNetworking(container *Container) error { +func (daemon *Daemon) initializeNetworking(container *Container, isRestoring bool) error { var err error if container.hostConfig.NetworkMode.IsContainer() { @@ -1076,7 +1086,7 @@ func (daemon *Daemon) initializeNetworking(container *Container) error { } - if err := daemon.allocateNetwork(container); err != nil { + if err := daemon.allocateNetwork(container, isRestoring); err != nil { return err } diff --git a/daemon/container_windows.go b/daemon/container_windows.go index 2d8eb87a0ad40..97dbdbf5a24ee 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -36,7 +36,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string return container.Config.Env } -func (daemon *Daemon) initializeNetworking(container *Container) error { +func (daemon *Daemon) initializeNetworking(container *Container, isRestoring bool) error { return nil } @@ -153,7 +153,7 @@ func (daemon *Daemon) setNetworkNamespaceKey(containerID string, pid int) error } // allocateNetwork is a no-op on Windows. -func (daemon *Daemon) allocateNetwork(container *Container) error { +func (daemon *Daemon) allocateNetwork(container *Container, isRestoring bool) error { return nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 6336faf55acc2..5288976079895 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -304,6 +304,18 @@ func (daemon *Daemon) restore() error { logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = &cr{container: container} + + // If the container was checkpointed, we need to reserve + // the IP address that it was using. + // + // XXX We should also reserve host ports (if any). + if container.IsCheckpointed() { + /*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + if err != nil { + log.Errorf("Failed to reserve IP %s for container %s", + container.ID, container.NetworkSettings.IPAddress) + }*/ + } } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } @@ -954,6 +966,21 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, hooks) } +// Checkpoint the container +func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error { + return daemon.execDriver.Checkpoint(c.command, opts) +} + +// Restore the container +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.DriverCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + hooks := execdriver.Hooks{ + Restore: restoreCallback, + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, hooks, opts, forceRestore) + return exitCode, err +} + func (daemon *Daemon) kill(c *Container, sig int) error { return daemon.execDriver.Kill(c.command, sig) } diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index e6a6c0f05ec92..bf4e06a54a210 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -615,8 +615,9 @@ func (daemon *Daemon) newBaseContainer(id string) *Container { root: daemon.containerRoot(id), MountPoints: make(map[string]*volume.MountPoint), }, - Volumes: make(map[string]string), - VolumesRW: make(map[string]bool), + Volumes: make(map[string]string), + VolumesRW: make(map[string]bool), + CriuimagePaths: make(map[string]string), } } diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 4a9d7d6e062ad..ef3551c361791 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -6,6 +6,7 @@ import ( "os/exec" "time" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" ) @@ -37,6 +38,8 @@ type Hooks struct { Start DriverCallback // PostStop is called after the container process exits PostStop []DriverCallback + // Restore is called after the container is restored + Restore DriverCallback } // Info is driver specific information based on @@ -72,6 +75,12 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error + // Checkpoints a container (with criu). + Checkpoint(c *Command, opts *runconfig.CriuConfig) error + + // Restores a checkpoint image into a container (with criu). + Restore(c *Command, pipes *Pipes, hooks Hooks, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error) + // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index 09171c56dde3f..d67783b2ecb44 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -158,10 +159,13 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd d.activeContainers[c.ID] = cont d.Unlock() defer func() { - if !destroyed { - cont.Destroy() + status, _ := cont.Status() + if status != libcontainer.Checkpointed { + if !destroyed { + cont.Destroy() + } + d.cleanContainer(c.ID) } - d.cleanContainer(c.ID) }() if err := cont.Start(p); err != nil { @@ -302,6 +306,121 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } +func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { + return &libcontainer.CriuOpts{ + ImagesDirectory: runconfigOpts.ImagesDirectory, + WorkDirectory: runconfigOpts.WorkDirectory, + LeaveRunning: runconfigOpts.LeaveRunning, + TcpEstablished: runconfigOpts.TCPEstablished, + ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, + ShellJob: runconfigOpts.ShellJob, + FileLocks: runconfigOpts.FileLocks, + } +} + +// Checkpoint implements the exec driver Driver interface. +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + + d.Lock() + defer d.Unlock() + err := active.Checkpoint(libcontainerCriuOpts(opts)) + if err != nil { + return err + } + + return nil +} + +// Restore implements the exec driver Driver interface. +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + var ( + cont libcontainer.Container + err error + ) + + destroyed := false + cont, err = d.factory.Load(c.ID) + if err != nil { + if forceRestore { + var config *configs.Config + config, err = d.createContainer(c, hooks) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cont, err = d.factory.Create(c.ID, config) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } else { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } + + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, + } + + config := cont.Config() + if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + d.Lock() + d.activeContainers[c.ID] = cont + d.Unlock() + defer func() { + status, _ := cont.Status() + if status != libcontainer.Checkpointed { + if !destroyed { + cont.Destroy() + } + d.cleanContainer(c.ID) + } + }() + + if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + oom := notifyOnOOM(cont) + if hooks.Restore != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err + } + hooks.Restore(&c.ProcessConfig, pid, oom) + } + + waitF := p.Wait + if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inherited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + execErr, ok := err.(*exec.ExitError) + if !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } + ps = execErr.ProcessState + } + + cont.Destroy() + destroyed = true + _, oomKill := <-oom + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil +} + // Terminate implements the exec driver Driver interface. func (d *Driver) Terminate(c *execdriver.Command) error { defer d.cleanContainer(c.ID) diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go index 587032e6e50f8..788c4e0f1d4c8 100644 --- a/daemon/execdriver/windows/windows.go +++ b/daemon/execdriver/windows/windows.go @@ -109,3 +109,13 @@ func setupEnvironmentVariables(a []string) map[string]string { } return r } + +// Checkpoint does not currently implement checkpoint, but complies to the Driver interface +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Windows: Containers cannot be checkpointed") +} + +// Restore does not currently implement restore, but complies to the Driver interface +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored") +} diff --git a/daemon/inspect.go b/daemon/inspect.go index 3107b3bd55e3e..bdecc27c1c7aa 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -104,17 +104,19 @@ func (daemon *Daemon) getInspectData(container *Container, size bool) (*types.Co } containerState := &types.ContainerState{ - Status: container.State.StateString(), - Running: container.State.Running, - Paused: container.State.Paused, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), - FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + Status: container.State.StateString(), + Running: container.State.Running, + Paused: container.State.Paused, + Checkpointed: container.State.Checkpointed, + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), + FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano), } contJSONBase := &types.ContainerJSONBase{ diff --git a/daemon/monitor.go b/daemon/monitor.go index c36d427a96ef8..380a8675c33c2 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -31,6 +31,8 @@ type containerSupervisor interface { StartLogging(*Container) error // Run starts a container Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) + // Restore restores a container that was previously checkpointed + Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.DriverCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) // IsShuttingDown tells whether the supervisor is shutting down or not IsShuttingDown() bool } @@ -68,6 +70,9 @@ type containerMonitor struct { // left waiting for nothing to happen during this time stopChan chan struct{} + // like startSignal but for restoring a container + restoreSignal chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -86,6 +91,7 @@ func (daemon *Daemon) newContainerMonitor(container *Container, policy runconfig timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}), startSignal: make(chan struct{}), + restoreSignal: make(chan struct{}), } } @@ -124,14 +130,30 @@ func (m *containerMonitor) Close() error { // Start starts the containers process and monitors it according to the restart policy func (m *containerMonitor) Start() error { + return m.start(nil, false) +} + +// Restore restores the containers from an image and monitors it according to the restart policy +func (m *containerMonitor) Restore(restoreOpts *runconfig.CriuConfig, forceRestore bool) error { + return m.start(restoreOpts, forceRestore) +} + +// Internal method to start or restore the containers and monitors it +func (m *containerMonitor) start(restoreOpts *runconfig.CriuConfig, forceRestore bool) error { var ( err error exitStatus execdriver.ExitStatus // this variable indicates where we in execution flow: // before Run or after - afterRun bool + afterRun bool + isRestoring bool ) + // we only want to restore once, but upon restart we should simply + // start the container normally, so isRestoring tells us where we are, + // and the initial value is whether or not we were provided restore opts + isRestoring = restoreOpts != nil + // ensure that when the monitor finally exits we release the networking and unmount the rootfs defer func() { if afterRun { @@ -152,19 +174,30 @@ func (m *containerMonitor) Start() error { for { m.container.RestartCount++ - if err := m.supervisor.StartLogging(m.container); err != nil { - m.resetContainer(false) + if m.container.logDriver == nil { + if err := m.supervisor.StartLogging(m.container); err != nil { + m.resetContainer(false) - return err + return err + } } pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) - m.logEvent("start") - m.lastStartTime = time.Now() - if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil { + if isRestoring { + m.logEvent("restore") + + exitStatus, err = m.supervisor.Restore(m.container, pipes, m.restoreCallback, restoreOpts, forceRestore) + isRestoring = false + } else { + m.logEvent("start") + + exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback) + } + + if err != nil { // if we receive an internal error from the initial start of a container then lets // return it instead of entering the restart loop // set to 127 for container cmd not found/does not exist) @@ -319,6 +352,47 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid return nil } +// Like callback() but for restoring a container. +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int, chOOM <-chan struct{}) error { + go func() { + _, ok := <-chOOM + if ok { + m.logEvent("oom") + } + }() + + if processConfig.Tty { + // The callback is called after the process Start() + // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave + // which we close here. + if c, ok := processConfig.Stdout.(io.Closer); ok { + c.Close() + } + } + + // If restorePid is 0, it means that restore failed. + if restorePid != 0 { + m.container.setRunning(restorePid) + } + + // Unblock the goroutine waiting in waitForRestore(). + select { + case <-m.restoreSignal: + default: + close(m.restoreSignal) + } + + if restorePid != 0 { + // Write config.json and hostconfig.json files + // to /var/lib/docker/containers/. + if err := m.container.toDiskLocking(); err != nil { + logrus.Errorf("Error saving container to disk: %s", err) + } + } + + return nil +} + // resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct // if lock is true, then container locked during reset diff --git a/daemon/restore.go b/daemon/restore.go new file mode 100644 index 0000000000000..16393883a7531 --- /dev/null +++ b/daemon/restore.go @@ -0,0 +1,143 @@ +package daemon + +import ( + "fmt" + "path/filepath" + + derr "github.com/docker/docker/errors" + "github.com/docker/docker/pkg/promise" + "github.com/docker/docker/runconfig" +) + +// ContainerRestore restores the process in a container with CRIU +func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + + if !forceRestore { + // TODO: It's possible we only want to bypass the checkpointed check, + // I'm not sure how this will work if the container is already running + if container.IsRunning() { + return fmt.Errorf("Container %s already running", name) + } + + if !container.IsCheckpointed() { + return fmt.Errorf("Container %s is not checkpointed", name) + } + } else { + if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" { + return fmt.Errorf("You must specify an image directory to restore from %s", name) + } + } + + if opts.ImagesDirectory == "" { + opts.ImagesDirectory = filepath.Join(container.root, "criu.image") + } + + if opts.WorkDirectory == "" { + opts.WorkDirectory = filepath.Join(container.root, "criu.work") + } + + if err = daemon.containerRestore(container, opts, forceRestore); err != nil { + return fmt.Errorf("Cannot restore container %s: %s", name, err) + } + + return nil +} + +// containerRestore prepares the container to be restored by setting up +// everything the container needs, just like containerStart, such as +// storage and networking, as well as links between containers. +// The container is left waiting for a signal that restore has finished +func (daemon *Daemon) containerRestore(container *Container, opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + if container.Running { + return nil + } + + if container.removalInProgress || container.Dead { + return derr.ErrorCodeContainerBeingRemoved + } + + // if we encounter an error during start we need to ensure that any other + // setup has been cleaned up properly + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + daemon.Cleanup(container) + daemon.LogContainerEvent(container, "die") + } + }() + + if err := daemon.conditionalMountOnStart(container); err != nil { + return err + } + + // Make sure NetworkMode has an acceptable value. We do this to ensure + // backwards API compatibility. + container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) + + if err := daemon.initializeNetworking(container, true); err != nil { + return err + } + linkedEnv, err := daemon.setupLinkedContainers(container) + if err != nil { + return err + } + if err := container.setupWorkingDirectory(); err != nil { + return err + } + env := container.createDaemonEnvironment(linkedEnv) + if err := daemon.populateCommand(container, env); err != nil { + return err + } + + if !container.hostConfig.IpcMode.IsContainer() && !container.hostConfig.IpcMode.IsHost() { + if err := daemon.setupIpcDirs(container); err != nil { + return err + } + } + + if err := daemon.createRootfs(container); err != nil { + return err + } + + mounts, err := daemon.setupMounts(container) + if err != nil { + return err + } + mounts = append(mounts, container.ipcMounts()...) + + container.command.Mounts = mounts + return daemon.waitForRestore(container, opts, forceRestore) +} + +func (daemon *Daemon) waitForRestore(container *Container, opts *runconfig.CriuConfig, forceRestore bool) error { + container.monitor = daemon.newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): + return err + } + + return nil +} diff --git a/daemon/start.go b/daemon/start.go index de4516c7b62ee..d0a795af73917 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -96,7 +96,7 @@ func (daemon *Daemon) containerStart(container *Container) (err error) { // backwards API compatibility. container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) - if err := daemon.initializeNetworking(container); err != nil { + if err := daemon.initializeNetworking(container, false); err != nil { return err } linkedEnv, err := daemon.setupLinkedContainers(container) @@ -144,7 +144,12 @@ func (daemon *Daemon) waitForStart(container *Container) error { // Cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (daemon *Daemon) Cleanup(container *Container) { - daemon.releaseNetwork(container) + + if container.IsCheckpointed() { + logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + } else { + daemon.releaseNetwork(container) + } container.unmountIpcMounts(detachMounted) diff --git a/daemon/state.go b/daemon/state.go index 8ff5effc637c2..5515dc4b92498 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -20,6 +20,7 @@ type State struct { Running bool Paused bool Restarting bool + Checkpointed bool OOMKilled bool removalInProgress bool // Not need for this to be persistent on disk. Dead bool @@ -28,6 +29,7 @@ type State struct { Error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time + CheckpointedAt time.Time waitChan chan struct{} } @@ -51,6 +53,10 @@ func (s *State) String() string { return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) } + if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) + } + if s.removalInProgress { return "Removal In Progress" } @@ -82,6 +88,10 @@ func (s *State) StateString() string { return "running" } + if s.Checkpointed { + return "checkpointed" + } + if s.Dead { return "dead" } @@ -182,6 +192,7 @@ func (s *State) setRunning(pid int) { s.Error = "" s.Running = true s.Paused = false + s.Checkpointed = false s.Restarting = false s.ExitCode = 0 s.Pid = pid @@ -261,3 +272,27 @@ func (s *State) setDead() { s.Dead = true s.Unlock() } + +// SetCheckpointed sets the container's status to indicate it has been checkpointed +func (s *State) SetCheckpointed(leaveRunning bool) { + s.Lock() + s.CheckpointedAt = time.Now().UTC() + s.Checkpointed = !leaveRunning + s.Running = false + s.Paused = false + s.Restarting = false + // XXX Not sure if we need to close and recreate waitChan. + // close(s.waitChan) + // s.waitChan = make(chan struct{}) + s.Unlock() +} + +// HasBeenCheckpointed indicates whether the container has ever been checkpointed +func (s *State) HasBeenCheckpointed() bool { + return s.CheckpointedAt != time.Time{} +} + +// IsCheckpointed indicates whether the container is currently checkpointed +func (s *State) IsCheckpointed() bool { + return s.Checkpointed +} diff --git a/docker/docker.go b/docker/docker.go index be1f51e5f81f7..c9e365f0a694c 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -36,7 +36,7 @@ func main() { help := "\nCommands:\n" for _, cmd := range dockerCommands { - help += fmt.Sprintf(" %-10.10s%s\n", cmd.Name, cmd.Description) + help += fmt.Sprintf(" %-11.11s%s\n", cmd.Name, cmd.Description) } help += "\nRun 'docker COMMAND --help' for more information on a command." diff --git a/docker/flags_experimental.go b/docker/flags_experimental.go new file mode 100644 index 0000000000000..608865d4e37b9 --- /dev/null +++ b/docker/flags_experimental.go @@ -0,0 +1,21 @@ +// +build experimental + +package main + +import ( + "sort" + + "github.com/docker/docker/cli" +) + +func init() { + experimentalCommands := []cli.Command{ + {"checkpoint", "Checkpoint one or more running containers"}, + {"restore", "Restore one or more checkpointed containers"}, + } + + dockerCommands = append(dockerCommands, experimentalCommands...) + + //Sorting logic required here to pass Command Sort Test. + sort.Sort(byName(dockerCommands)) +} diff --git a/experimental/README.md b/experimental/README.md index d2eff37d8d4e6..42758298c7403 100644 --- a/experimental/README.md +++ b/experimental/README.md @@ -2,7 +2,7 @@ This page contains a list of features in the Docker engine which are experimental. Experimental features are **not** ready for production. They are -provided for test and evaluation in your sandbox environments. +provided for test and evaluation in your sandbox environments. The information below describes each feature and the GitHub pull requests and issues associated with it. If necessary, links are provided to additional @@ -73,9 +73,10 @@ to build a Docker binary with the experimental features enabled: * [External graphdriver plugins](plugins_graphdriver.md) * [User namespaces](userns.md) + * [Checkpoint & Restore](checkpoint_restore.md) ## How to comment on an experimental feature -Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. +Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. -Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). +Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). diff --git a/experimental/checkpoint_restore.md b/experimental/checkpoint_restore.md new file mode 100644 index 0000000000000..f3ed0b5898e85 --- /dev/null +++ b/experimental/checkpoint_restore.md @@ -0,0 +1,154 @@ +# Docker Checkpoint & Restore + +Checkpoint & Restore is a new feature that allows you to freeze a running +container by checkpointing it, which turns its state into a collection of files +on disk. Later, the container can be restored from the point it was frozen. + +This is accomplished using a tool called [CRIU](http://criu.org), which is an +external dependency of this feature. A good overview of the history of +checkpoint and restore in Docker is available in this +[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html). + +## Installing CRIU + +If you use a Debian system, you can add the CRIU PPA and install with apt-get +https://launchpad.net/~criu/+archive/ubuntu/ppa. + +Alternatively, you can [build CRIU from source](http://criu.org/Installation). + +## Use cases for checkpoint & restore + +This feature is currently focused on single-host use cases for checkpoint and +restore. Here are a few: + +- Restarting / upgrading the docker daemon without stopping containers +- Restarting the host machine without stopping/starting containers +- Speeding up the start time of slow start applications +- "Rewinding" processes to an earlier point in time +- "Forensic debugging" of running processes + +Another primary use case of checkpoint & restore outside of Docker is the live +migration of a server from one machine to another. This is possible with the +current implementation, but not currently a priority (and so the workflow is +not optimized for the task). + +## Using Checkpoint & Restore + +Two new top level commands are introduced in the CLI: `checkpoint` & `restore`. +The options for checkpoint: + + Usage: docker checkpoint [OPTIONS] CONTAINER [CONTAINER...] + + Checkpoint one or more running containers + + --allow-shell=false allow checkpointing shell jobs + --image-dir= directory for storing checkpoint image files + --leave-running=false leave the container running after checkpoint + --work-dir= directory for storing log file + +And for restore: + + Usage: docker restore [OPTIONS] CONTAINER [CONTAINER...] + + Restore one or more checkpointed containers + + --allow-shell=false allow restoring shell jobs + --force=false bypass checks for current container state + --image-dir= directory to restore image files from + --work-dir= directory for restore log + +A simple example of using checkpoint & restore on a container: + + $ docker run --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' + > abc0123 + + $ docker checkpoint cr + > abc0123 + + $ docker restore cr + > abc0123 + +This process just logs an incrementing counter to stdout. If you `docker logs` +in between running/checkpoint/restoring you should see that the counter +increases while the process is running, stops while it's checkpointed, and +resumes from the point it left off once you restore. + +### Same container checkpoint/restore + +The above example falls into the category of "same container" use cases for c/r. +Restarting the daemon is an example of this kind of use case. There is only one +container here at any point in time. That container's status, once it is +checkpointed, will be "Checkpointed" and docker inspect will contain that status +as well as the time of the last checkpoint. The IP address and other container +state do not change (see known issues at the bottom of this document). + +### New container checkpoint/restore + +Here's an example of a "new container" use case for c/r: + + $ docker run some_image + > abc789 + + ## the container runs for a while + + $ docker checkpoint --image-dir=/some/path abc789 + > abc789 + +At this point, we've created a checkpoint image at `/some/path` that encodes a +process at the exact state we want it to be. Now, at some later point in time, +we can put a copy of that exact state into a new container (perhaps many times): + + $ docker create some_image + > def123 + + $ docker restore --force=true --image-dir=/some/path def123 + > def123 + +We created a new container (but didn't start it), and then we restored our +checkpointed process into that container. + +This is obviously more involved than the simple use case shown earlier. It +requires starting subsequent containers with the same configuration (e.g. +the same mounted volumes, the same base image, etc.). + +### Options + +Checkpoint & Restore: + + --image-dir= directory for storing checkpoint image files + +Allows you to specify the path for writing a checkpoint image, or the path for +the image you want to restore. + + --work-dir= directory for storing log file + +Allows you to specify the path for writing the CRIU log. + + --leave-running=false leave the container running after checkpoint + +Normally, when checkpointing a process, the process is stopped aftewrards. +When this flag is enabled, the process keeps running after a checkpoint. This is +useful if you want to capture a process at multiple points in time, for later +use in debugging or rewinding a process for some reason. It's also used for +minimizing downtime when checkpointing processes with a large memory footprint. + +Restore Only: + + --force=false force restoring into a container + +As shown in the "new container" example, this flag allows you to restore a +checkpoint image into a container that was not previously checkpointed. +Normally, docker would return an error when restoring into a container that +has not been previously checkpointed. + +## Known Issues + +- Currently, networking is broken in this PR. Although it's implemented at the +libcontainer level, the method used no longer works since the introduction of +libnetwork. See: + - https://github.com/docker/libnetwork/pull/465 + - https://github.com/boucher/docker/pull/15 +- There are likely several networking related issues to work out, like: + - ensuring IPs are reserved across daemon restarts + - ensuring port maps are reserved + - deciding how to deal with network resources in the "new container" model diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go new file mode 100644 index 0000000000000..09ec47a9a0d54 --- /dev/null +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -0,0 +1,39 @@ +// +build experimental + +package main + +import ( + "os/exec" + "strings" + + "github.com/go-check/check" +) + +func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) { + defer unpauseAllContainers() + + runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(runCmd) + if err != nil { + c.Fatalf("failed to run container: %v, output: %q", err, out) + } + + containerID := strings.TrimSpace(out) + checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID) + out, _, err = runCommandWithOutput(checkpointCmd) + if err != nil { + c.Fatalf("failed to checkpoint container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "true") + + restoreCmd := exec.Command(dockerBinary, "restore", containerID) + out, _, _, err = runCommandWithStdoutStderr(restoreCmd) + if err != nil { + c.Fatalf("failed to restore container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "false") +} diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index 5c184393164b8..1a0557621af92 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -227,7 +227,7 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { // Number of commands for standard release and experimental release standard := 40 - experimental := 1 + experimental := 3 expected := standard + experimental if isLocalDaemon { expected++ // for the daemon command diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md index 5ea659fa44e2a..535e49b822fda 100644 --- a/project/PACKAGERS.md +++ b/project/PACKAGERS.md @@ -303,6 +303,9 @@ by having support for them in the kernel or userspace. A few examples include: least the "auplink" utility from aufs-tools) * BTRFS graph driver (requires BTRFS support enabled in the kernel) * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module) +* Checkpoint/Restore containers: + - requires criu version 1.6 or later (criu.org) + - requires kernel version 3.19 or later if using overlay-fs ## Daemon Init Script diff --git a/runconfig/restore.go b/runconfig/restore.go new file mode 100644 index 0000000000000..8993294411a96 --- /dev/null +++ b/runconfig/restore.go @@ -0,0 +1,18 @@ +package runconfig + +// CriuConfig holds configuration options passed down to libcontainer and CRIU +type CriuConfig struct { + ImagesDirectory string + WorkDirectory string + LeaveRunning bool + TCPEstablished bool + ExternalUnixConnections bool + ShellJob bool + FileLocks bool +} + +// RestoreConfig holds the restore command options, which is a superset of the CRIU options +type RestoreConfig struct { + CriuOpts CriuConfig + ForceRestore bool +}