From a971a0566234517450d78b456d061b5573ab2300 Mon Sep 17 00:00:00 2001
From: Saied Kazemi <saied@google.com>
Date: Thu, 5 Feb 2015 20:32:27 -0800
Subject: [PATCH 1/4] Checkpoint/Restore Support: add exec driver methods

Methods for checkpointing and restoring containers were added to the
native driver.  The LXC driver returns an error message that these
methods are not implemented yet.

Signed-off-by: Saied Kazemi <saied@google.com>

Conflicts:
	daemon/execdriver/native/create.go
	daemon/execdriver/native/driver.go
	daemon/execdriver/native/init.go

Conflicts:
	daemon/execdriver/driver.go
	daemon/execdriver/native/create.go
---
 daemon/execdriver/driver.go        |   6 ++
 daemon/execdriver/native/driver.go | 150 +++++++++++++++++++++++++++++
 2 files changed, 156 insertions(+)

diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go
index 4a9d7d6e062ad..bcf32fcaf2d4d 100644
--- a/daemon/execdriver/driver.go
+++ b/daemon/execdriver/driver.go
@@ -39,6 +39,8 @@ type Hooks struct {
 	PostStop []DriverCallback
 }
 
+type RestoreCallback func(*ProcessConfig, int)
+
 // Info is driver specific information based on
 // processes registered with the driver
 type Info interface {
@@ -72,6 +74,10 @@ type Driver interface {
 	// Unpause unpauses a container.
 	Unpause(c *Command) error
 
+	Checkpoint(c *Command) error
+
+	Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
+
 	// Name returns the name of the driver.
 	Name() string
 
diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go
index 09171c56dde3f..f87a40a0083ec 100644
--- a/daemon/execdriver/native/driver.go
+++ b/daemon/execdriver/native/driver.go
@@ -20,6 +20,7 @@ import (
 	"github.com/docker/docker/pkg/reexec"
 	sysinfo "github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/term"
+	"github.com/docker/docker/utils"
 	"github.com/opencontainers/runc/libcontainer"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
@@ -302,6 +303,155 @@ func (d *Driver) Unpause(c *execdriver.Command) error {
 	return active.Resume()
 }
 
+// XXX Where is the right place for the following
+//     const and getCheckpointImageDir() function?
+const (
+	containersDir = "/var/lib/docker/containers"
+	criuImgDir    = "criu_img"
+)
+
+func getCheckpointImageDir(containerId string) string {
+	return filepath.Join(containersDir, containerId, criuImgDir)
+}
+
+func (d *driver) Checkpoint(c *execdriver.Command) error {
+	active := d.activeContainers[c.ID]
+	if active == nil {
+		return fmt.Errorf("active container for %s does not exist", c.ID)
+	}
+	container := active.container
+
+	// Create an image directory for this container (which
+	// may already exist from a previous checkpoint).
+	imageDir := getCheckpointImageDir(c.ID)
+	err := os.MkdirAll(imageDir, 0700)
+	if err != nil && !os.IsExist(err) {
+		return err
+	}
+
+	// Copy container.json and state.json files to the CRIU
+	// image directory for later use during restore.  Do this
+	// before checkpointing because after checkpoint the container
+	// will exit and these files will be removed.
+	log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
+	srcFiles := []string{"container.json", "state.json"}
+	for _, f := range srcFiles {
+		srcFile := filepath.Join(d.root, c.ID, f)
+		dstFile := filepath.Join(imageDir, f)
+		if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
+			return err
+		}
+	}
+
+	d.Lock()
+	defer d.Unlock()
+	err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+type restoreOutput struct {
+	exitCode int
+	err      error
+}
+
+func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
+	imageDir := getCheckpointImageDir(c.ID)
+	container, err := d.createRestoreContainer(c, imageDir)
+	if err != nil {
+		return 1, err
+	}
+
+	var term execdriver.Terminal
+
+	if c.ProcessConfig.Tty {
+		term, err = NewTtyConsole(&c.ProcessConfig, pipes)
+	} else {
+		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
+	}
+	if err != nil {
+		return -1, err
+	}
+	c.ProcessConfig.Terminal = term
+
+	d.Lock()
+	d.activeContainers[c.ID] = &activeContainer{
+		container: container,
+		cmd:       &c.ProcessConfig.Cmd,
+	}
+	d.Unlock()
+	defer d.cleanContainer(c.ID)
+
+	// Since the CRIU binary exits after restoring the container, we
+	// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
+	// so that it'll be owned by this process (Docker daemon) after restore.
+	//
+	// XXX This really belongs to where the Docker daemon starts.
+	if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
+		return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
+	}
+
+	restoreOutputChan := make(chan restoreOutput, 1)
+	waitForRestore := make(chan struct{})
+
+	go func() {
+		exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
+			func(child *os.File, args []string) *exec.Cmd {
+				cmd := new(exec.Cmd)
+				cmd.Path = d.initPath
+				cmd.Args = append([]string{
+					DriverName,
+					"-restore",
+					"-pipe", "3",
+					"--",
+				}, args...)
+				cmd.ExtraFiles = []*os.File{child}
+				return cmd
+			},
+			func(restorePid int) error {
+				log.CRDbg("restorePid=%d", restorePid)
+				if restorePid == 0 {
+					restoreCallback(&c.ProcessConfig, 0)
+					return nil
+				}
+
+				// The container.json file should be written *after* the container
+				// has started because its StdFds cannot be initialized before.
+				//
+				// XXX How do we handle error here?
+				d.writeContainerFile(container, c.ID)
+				close(waitForRestore)
+				if restoreCallback != nil {
+					c.ProcessConfig.Process, err = os.FindProcess(restorePid)
+					if err != nil {
+						log.Debugf("cannot find restored process %d", restorePid)
+						return err
+					}
+					c.ContainerPid = c.ProcessConfig.Process.Pid
+					restoreCallback(&c.ProcessConfig, c.ContainerPid)
+				}
+				return nil
+			})
+		restoreOutputChan <- restoreOutput{exitCode, err}
+	}()
+
+	select {
+	case restoreOutput := <-restoreOutputChan:
+		// there was an error
+		return restoreOutput.exitCode, restoreOutput.err
+	case <-waitForRestore:
+		// container restored
+		break
+	}
+
+	// Wait for the container to exit.
+	restoreOutput := <-restoreOutputChan
+	return restoreOutput.exitCode, restoreOutput.err
+}
+
 // Terminate implements the exec driver Driver interface.
 func (d *Driver) Terminate(c *execdriver.Command) error {
 	defer d.cleanContainer(c.ID)

From acf9200aab244d3d163730a52f3ebc0edeebb22a Mon Sep 17 00:00:00 2001
From: Saied Kazemi <saied@google.com>
Date: Thu, 5 Feb 2015 20:37:07 -0800
Subject: [PATCH 2/4] Checkpoint/Restore Support: add functionality to daemon

Support was added to the daemon to use the Checkpoint and Restore methods
of the native exec driver for checkpointing and restoring containers.

Signed-off-by: Saied Kazemi <saied@google.com>

Conflicts:
	api/server/server.go
	daemon/container.go
	daemon/daemon.go
	daemon/networkdriver/bridge/driver.go
	daemon/state.go
	vendor/src/github.com/docker/libnetwork/ipallocator/allocator.go

Conflicts:
	api/server/server.go
---
 api/server/server.go | 30 +++++++++++++++++++
 daemon/checkpoint.go | 55 ++++++++++++++++++++++++++++++++++
 daemon/container.go  | 59 ++++++++++++++++++++++++++++++++++++-
 daemon/daemon.go     | 31 ++++++++++++++++++++
 daemon/monitor.go    | 70 ++++++++++++++++++++++++++++++++++++++++++++
 daemon/start.go      |  7 ++++-
 daemon/state.go      | 23 +++++++++++++++
 7 files changed, 273 insertions(+), 2 deletions(-)
 create mode 100644 daemon/checkpoint.go

diff --git a/api/server/server.go b/api/server/server.go
index b90d704c36147..972bd10f76bd2 100644
--- a/api/server/server.go
+++ b/api/server/server.go
@@ -119,6 +119,36 @@ func (s *HTTPServer) Close() error {
 	return s.l.Close()
 }
 
+func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := parseForm(r); err != nil {
+		return err
+	}
+	job := eng.Job("checkpoint", vars["name"])
+	if err := job.Run(); err != nil {
+		return err
+	}
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
+
+func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := parseForm(r); err != nil {
+		return err
+	}
+	job := eng.Job("restore", vars["name"])
+	if err := job.Run(); err != nil {
+		return err
+	}
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
+
 func writeCorsHeaders(w http.ResponseWriter, r *http.Request, corsHeaders string) {
 	logrus.Debugf("CORS header is enabled and set to: %s", corsHeaders)
 	w.Header().Add("Access-Control-Allow-Origin", corsHeaders)
diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go
new file mode 100644
index 0000000000000..f6057c6a028f9
--- /dev/null
+++ b/daemon/checkpoint.go
@@ -0,0 +1,55 @@
+package daemon
+
+import (
+	"github.com/docker/docker/engine"
+)
+
+// Checkpoint a running container.
+func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status {
+	if len(job.Args) != 1 {
+		return job.Errorf("Usage: %s CONTAINER\n", job.Name)
+	}
+
+	name := job.Args[0]
+	container, err := daemon.Get(name)
+	if err != nil {
+		return job.Error(err)
+	}
+	if !container.IsRunning() {
+		return job.Errorf("Container %s not running", name)
+	}
+
+	if err := container.Checkpoint(); err != nil {
+		return job.Errorf("Cannot checkpoint container %s: %s", name, err)
+	}
+
+	container.LogEvent("checkpoint")
+	return engine.StatusOK
+}
+
+// Restore a checkpointed container.
+func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status {
+	if len(job.Args) != 1 {
+		return job.Errorf("Usage: %s CONTAINER\n", job.Name)
+	}
+
+	name := job.Args[0]
+	container, err := daemon.Get(name)
+	if err != nil {
+		return job.Error(err)
+	}
+	if container.IsRunning() {
+		return job.Errorf("Container %s already running", name)
+	}
+	if !container.State.IsCheckpointed() {
+		return job.Errorf("Container %s is not checkpointed", name)
+	}
+
+	if err := container.Restore(); err != nil {
+		container.LogEvent("die")
+		return job.Errorf("Cannot restore container %s: %s", name, err)
+	}
+
+	container.LogEvent("restore")
+	return engine.StatusOK
+}
diff --git a/daemon/container.go b/daemon/container.go
index 528dcf886757d..d0f8aab5f1a57 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -288,6 +288,41 @@ func validateID(id string) error {
 	return nil
 }
 
+func (container *Container) Checkpoint() error {
+	return container.daemon.Checkpoint(container)
+}
+
+func (container *Container) Restore() error {
+	var err error
+
+	container.Lock()
+	defer container.Unlock()
+
+	defer func() {
+		if err != nil {
+			container.cleanup()
+		}
+	}()
+
+	if err = container.initializeNetworking(); err != nil {
+		return err
+	}
+
+	linkedEnv, err := container.setupLinkedContainers()
+	if err != nil {
+		return err
+	}
+	if err = container.setupWorkingDirectory(); err != nil {
+		return err
+	}
+	env := container.createDaemonEnvironment(linkedEnv)
+	if err = populateCommandRestore(container, env); err != nil {
+		return err
+	}
+
+	return container.waitForRestore()
+}
+
 // Returns true if the container exposes a certain port
 func (container *Container) exposes(p nat.Port) bool {
 	_, exists := container.Config.ExposedPorts[p]
@@ -335,6 +370,29 @@ func (container *Container) StartLogger(cfg runconfig.LogConfig) (logger.Logger,
 	return c(ctx)
 }
 
+// Like waitForStart() but for restoring a container.
+//
+// XXX Does RestartPolicy apply here?
+func (container *Container) waitForRestore() error {
+	container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)
+
+	// After calling promise.Go() we'll have two goroutines:
+	// - The current goroutine that will block in the select
+	//   below until restore is done.
+	// - A new goroutine that will restore the container and
+	//   wait for it to exit.
+	select {
+	case <-container.monitor.restoreSignal:
+		if container.ExitCode != 0 {
+			return fmt.Errorf("restore process failed")
+		}
+	case err := <-promise.Go(container.monitor.Restore):
+		return err
+	}
+
+	return nil
+}
+
 func (container *Container) getProcessLabel() string {
 	// even if we have a process label return "" if we are running
 	// in privileged mode
@@ -411,7 +469,6 @@ func attach(streamConfig *streamConfig, openStdin, stdinOnce, tty bool, stdin io
 			_, err = copyEscapable(cStdin, stdin)
 		} else {
 			_, err = io.Copy(cStdin, stdin)
-
 		}
 		if err == io.ErrClosedPipe {
 			err = nil
diff --git a/daemon/daemon.go b/daemon/daemon.go
index 6336faf55acc2..b33527e611b71 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -304,6 +304,18 @@ func (daemon *Daemon) restore() error {
 			logrus.Debugf("Loaded container %v", container.ID)
 
 			containers[container.ID] = &cr{container: container}
+
+			// If the container was checkpointed, we need to reserve
+			// the IP address that it was using.
+			//
+			// XXX We should also reserve host ports (if any).
+			if container.IsCheckpointed() {
+				/*err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress)
+				if err != nil {
+					log.Errorf("Failed to reserve IP %s for container %s",
+						container.ID, container.NetworkSettings.IPAddress)
+				}*/
+			}
 		} else {
 			logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
 		}
@@ -954,6 +966,25 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e
 	return daemon.execDriver.Run(c.command, pipes, hooks)
 }
 
+func (daemon *Daemon) Checkpoint(c *Container) error {
+	if err := daemon.execDriver.Checkpoint(c.command); err != nil {
+		return err
+	}
+	c.SetCheckpointed()
+	return nil
+}
+
+func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
+	// Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go).
+	_, err := daemon.driver.Get(c.ID, c.GetMountLabel())
+	if err != nil {
+		return 0, err
+	}
+
+	exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback)
+	return exitCode, err
+}
+
 func (daemon *Daemon) kill(c *Container, sig int) error {
 	return daemon.execDriver.Kill(c.command, sig)
 }
diff --git a/daemon/monitor.go b/daemon/monitor.go
index c36d427a96ef8..375a1fc62ba6d 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -68,6 +68,9 @@ type containerMonitor struct {
 	// left waiting for nothing to happen during this time
 	stopChan chan struct{}
 
+	// like startSignal but for restoring a container
+	restoreSignal chan struct{}
+
 	// timeIncrement is the amount of time to wait between restarts
 	// this is in milliseconds
 	timeIncrement int
@@ -86,6 +89,7 @@ func (daemon *Daemon) newContainerMonitor(container *Container, policy runconfig
 		timeIncrement: defaultTimeIncrement,
 		stopChan:      make(chan struct{}),
 		startSignal:   make(chan struct{}),
+		restoreSignal: make(chan struct{}),
 	}
 }
 
@@ -224,6 +228,49 @@ func (m *containerMonitor) Start() error {
 	}
 }
 
+// Like Start() but for restoring a container.
+func (m *containerMonitor) Restore() error {
+	var (
+		err error
+		// XXX The following line should be changed to
+		//     exitStatus execdriver.ExitStatus to match Start()
+		exitCode     int
+		afterRestore bool
+	)
+
+	defer func() {
+		if afterRestore {
+			m.container.Lock()
+			m.container.setStopped(&execdriver.ExitStatus{exitCode, false})
+			defer m.container.Unlock()
+		}
+		m.Close()
+	}()
+
+	if err := m.container.startLoggingToDisk(); err != nil {
+		m.resetContainer(false)
+		return err
+	}
+
+	pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
+
+	m.container.LogEvent("restore")
+	m.lastStartTime = time.Now()
+	if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil {
+		log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode)
+		m.container.ExitCode = -1
+		m.resetContainer(false)
+		return err
+	}
+	afterRestore = true
+
+	m.container.ExitCode = exitCode
+	m.resetMonitor(err == nil && exitCode == 0)
+	m.container.LogEvent("die")
+	m.resetContainer(true)
+	return err
+}
+
 // resetMonitor resets the stateful fields on the containerMonitor based on the
 // previous runs success or failure.  Regardless of success, if the container had
 // an execution time of more than 10s then reset the timer back to the default
@@ -319,6 +366,29 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid
 	return nil
 }
 
+// Like callback() but for restoring a container.
+func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) {
+	// If restorePid is 0, it means that restore failed.
+	if restorePid != 0 {
+		m.container.setRunning(restorePid)
+	}
+
+	// Unblock the goroutine waiting in waitForRestore().
+	select {
+	case <-m.restoreSignal:
+	default:
+		close(m.restoreSignal)
+	}
+
+	if restorePid != 0 {
+		// Write config.json and hostconfig.json files
+		// to /var/lib/docker/containers/<ID>.
+		if err := m.container.ToDisk(); err != nil {
+			log.Debugf("%s", err)
+		}
+	}
+}
+
 // resetContainer resets the container's IO and ensures that the command is able to be executed again
 // by copying the data into a new struct
 // if lock is true, then container locked during reset
diff --git a/daemon/start.go b/daemon/start.go
index de4516c7b62ee..747c44e808539 100644
--- a/daemon/start.go
+++ b/daemon/start.go
@@ -144,7 +144,12 @@ func (daemon *Daemon) waitForStart(container *Container) error {
 // Cleanup releases any network resources allocated to the container along with any rules
 // around how containers are linked together.  It also unmounts the container's root filesystem.
 func (daemon *Daemon) Cleanup(container *Container) {
-	daemon.releaseNetwork(container)
+
+	if container.IsCheckpointed() {
+		log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
+	} else {
+		daemon.releaseNetwork(container)
+	}
 
 	container.unmountIpcMounts(detachMounted)
 
diff --git a/daemon/state.go b/daemon/state.go
index 8ff5effc637c2..102ef6cd57e75 100644
--- a/daemon/state.go
+++ b/daemon/state.go
@@ -20,6 +20,7 @@ type State struct {
 	Running           bool
 	Paused            bool
 	Restarting        bool
+	Checkpointed      bool
 	OOMKilled         bool
 	removalInProgress bool // Not need for this to be persistent on disk.
 	Dead              bool
@@ -28,7 +29,9 @@ type State struct {
 	Error             string // contains last known error when starting the container
 	StartedAt         time.Time
 	FinishedAt        time.Time
+	CheckpointedAt    time.Time
 	waitChan          chan struct{}
+
 }
 
 // NewState creates a default state object with a fresh channel for state changes.
@@ -49,6 +52,8 @@ func (s *State) String() string {
 		}
 
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
+	} else if s.Checkpointed {
+		return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt)))
 	}
 
 	if s.removalInProgress {
@@ -182,6 +187,7 @@ func (s *State) setRunning(pid int) {
 	s.Error = ""
 	s.Running = true
 	s.Paused = false
+	s.Checkpointed = false
 	s.Restarting = false
 	s.ExitCode = 0
 	s.Pid = pid
@@ -261,3 +267,20 @@ func (s *State) setDead() {
 	s.Dead = true
 	s.Unlock()
 }
+
+func (s *State) SetCheckpointed() {
+	s.Lock()
+	s.CheckpointedAt = time.Now().UTC()
+	s.Checkpointed = true
+	s.Running = false
+	s.Paused = false
+	s.Restarting = false
+	// XXX Not sure if we need to close and recreate waitChan.
+	// close(s.waitChan)
+	// s.waitChan = make(chan struct{})
+	s.Unlock()
+}
+
+func (s *State) IsCheckpointed() bool {
+	return s.Checkpointed
+}

From a6a45115fc15f6cf373d19ac21b6ac1f41b88cd7 Mon Sep 17 00:00:00 2001
From: boucher <rboucher@gmail.com>
Date: Thu, 12 Nov 2015 11:56:07 -0500
Subject: [PATCH 3/4] Update checkpoint and restore to latest docker/master.

- C/R is now an EXPERIMENTAL level feature.
- Requires CRIU 1.6 (and builds it from source in the Dockerfile)
- Introduces checkpoint and restore as top level cli methods (will likely change)

Signed-off-by: Ross Boucher <rboucher@gmail.com>
---
 Dockerfile                                    |  17 ++
 api/client/checkpoint.go                      |  55 +++++
 api/client/restore.go                         |  57 +++++
 api/server/router/local/local.go              |   2 +
 api/server/router/local/local_experimental.go |  65 ++++++
 api/server/router/local/local_stable.go       |   6 +
 api/server/server.go                          |  30 ---
 api/types/types.go                            |  24 +-
 daemon/checkpoint.go                          |  63 +++---
 daemon/container.go                           |  58 -----
 daemon/container_unix.go                      |  24 +-
 daemon/container_windows.go                   |   4 +-
 daemon/daemon.go                              |  20 +-
 daemon/execdriver/driver.go                   |  11 +-
 daemon/execdriver/native/driver.go            | 209 ++++++++----------
 daemon/execdriver/windows/windows.go          |  10 +
 daemon/inspect.go                             |  24 +-
 daemon/monitor.go                             | 110 ++++-----
 daemon/restore.go                             | 139 ++++++++++++
 daemon/start.go                               |   4 +-
 daemon/state.go                               |  20 +-
 docker/docker.go                              |   2 +-
 docker/flags_experimental.go                  |  21 ++
 experimental/README.md                        |   7 +-
 experimental/checkpoint_restore.md            | 154 +++++++++++++
 integration-cli/docker_cli_checkpoint_test.go |  39 ++++
 integration-cli/docker_cli_help_test.go       |   2 +-
 project/PACKAGERS.md                          |   3 +
 runconfig/restore.go                          |  18 ++
 29 files changed, 844 insertions(+), 354 deletions(-)
 create mode 100644 api/client/checkpoint.go
 create mode 100644 api/client/restore.go
 create mode 100644 api/server/router/local/local_experimental.go
 create mode 100644 api/server/router/local/local_stable.go
 create mode 100644 daemon/restore.go
 create mode 100644 docker/flags_experimental.go
 create mode 100644 experimental/checkpoint_restore.md
 create mode 100644 integration-cli/docker_cli_checkpoint_test.go
 create mode 100644 runconfig/restore.go

diff --git a/Dockerfile b/Dockerfile
index 2fbbfed9c499d..c9be2691525be 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -32,9 +32,11 @@ RUN	echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et
 # Packaged dependencies
 RUN apt-get update && apt-get install -y \
 	apparmor \
+	asciidoc \
 	aufs-tools \
 	automake \
 	bash-completion \
+	bsdmainutils \
 	btrfs-tools \
 	build-essential \
 	createrepo \
@@ -43,15 +45,22 @@ RUN apt-get update && apt-get install -y \
 	gcc-mingw-w64 \
 	git \
 	iptables \
+	libaio-dev \
 	libapparmor-dev \
 	libcap-dev \
+	libprotobuf-c0-dev \
+	libprotobuf-dev	\
 	libsqlite3-dev \
 	libsystemd-journal-dev \
 	mercurial \
 	parallel \
 	pkg-config \
+	protobuf-compiler \
+	protobuf-c-compiler \
+	python-minimal \
 	python-mock \
 	python-pip \
+	python-protobuf \
 	python-websocket \
 	reprepro \
 	ruby1.9.1 \
@@ -59,6 +68,7 @@ RUN apt-get update && apt-get install -y \
 	s3cmd=1.1.0* \
 	ubuntu-zfs \
 	xfsprogs \
+	xmlto \
 	libzfs-dev \
 	--no-install-recommends
 
@@ -73,6 +83,13 @@ RUN cd /usr/local/lvm2 \
 	&& make install_device-mapper
 # see https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL
 
+# Install Criu
+RUN mkdir -p /usr/src/criu \
+	&& curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1
+RUN cd /usr/src/criu \
+	&& make \
+	&& make install
+
 # Install Go
 ENV GO_VERSION 1.5.1
 RUN curl -sSL  "https://storage.googleapis.com/golang/go${GO_VERSION}.linux-amd64.tar.gz" | tar -v -C /usr/local -xz
diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go
new file mode 100644
index 0000000000000..9655e68de221f
--- /dev/null
+++ b/api/client/checkpoint.go
@@ -0,0 +1,55 @@
+// +build experimental
+
+package client
+
+import (
+	"fmt"
+
+	Cli "github.com/docker/docker/cli"
+	flag "github.com/docker/docker/pkg/mflag"
+	"github.com/docker/docker/runconfig"
+)
+
+// CmdCheckpoint checkpoints the process running in a container
+//
+// Usage: docker checkpoint CONTAINER
+func (cli *DockerCli) CmdCheckpoint(args ...string) error {
+	cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true)
+	cmd.Require(flag.Min, 1)
+
+	var (
+		flImgDir       = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files")
+		flWorkDir      = cmd.String([]string{"-work-dir"}, "", "directory for storing log file")
+		flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint")
+	)
+
+	if err := cmd.ParseFlags(args, true); err != nil {
+		return err
+	}
+
+	if cmd.NArg() < 1 {
+		cmd.Usage()
+		return nil
+	}
+
+	criuOpts := &runconfig.CriuConfig{
+		ImagesDirectory:         *flImgDir,
+		WorkDirectory:           *flWorkDir,
+		LeaveRunning:            *flLeaveRunning,
+		TCPEstablished:          true,
+		ExternalUnixConnections: true,
+		FileLocks:               true,
+	}
+
+	var encounteredError error
+	for _, name := range cmd.Args() {
+		_, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil))
+		if err != nil {
+			fmt.Fprintf(cli.err, "%s\n", err)
+			encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers")
+		} else {
+			fmt.Fprintf(cli.out, "%s\n", name)
+		}
+	}
+	return encounteredError
+}
diff --git a/api/client/restore.go b/api/client/restore.go
new file mode 100644
index 0000000000000..e73b62b509303
--- /dev/null
+++ b/api/client/restore.go
@@ -0,0 +1,57 @@
+// +build experimental
+
+package client
+
+import (
+	"fmt"
+
+	Cli "github.com/docker/docker/cli"
+	flag "github.com/docker/docker/pkg/mflag"
+	"github.com/docker/docker/runconfig"
+)
+
+// CmdRestore restores the process in a checkpointed container
+//
+// Usage: docker restore CONTAINER
+func (cli *DockerCli) CmdRestore(args ...string) error {
+	cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true)
+	cmd.Require(flag.Min, 1)
+
+	var (
+		flImgDir  = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from")
+		flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log")
+		flForce   = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state")
+	)
+
+	if err := cmd.ParseFlags(args, true); err != nil {
+		return err
+	}
+
+	if cmd.NArg() < 1 {
+		cmd.Usage()
+		return nil
+	}
+
+	restoreOpts := &runconfig.RestoreConfig{
+		CriuOpts: runconfig.CriuConfig{
+			ImagesDirectory:         *flImgDir,
+			WorkDirectory:           *flWorkDir,
+			TCPEstablished:          true,
+			ExternalUnixConnections: true,
+			FileLocks:               true,
+		},
+		ForceRestore: *flForce,
+	}
+
+	var encounteredError error
+	for _, name := range cmd.Args() {
+		_, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil))
+		if err != nil {
+			fmt.Fprintf(cli.err, "%s\n", err)
+			encounteredError = fmt.Errorf("Error: failed to restore one or more containers")
+		} else {
+			fmt.Fprintf(cli.out, "%s\n", name)
+		}
+	}
+	return encounteredError
+}
diff --git a/api/server/router/local/local.go b/api/server/router/local/local.go
index b27031bd24693..a9e0a00670ca4 100644
--- a/api/server/router/local/local.go
+++ b/api/server/router/local/local.go
@@ -145,6 +145,8 @@ func (r *router) initRoutes() {
 		NewDeleteRoute("/containers/{name:.*}", r.deleteContainers),
 		NewDeleteRoute("/images/{name:.*}", r.deleteImages),
 	}
+
+	addExperimentalRoutes(r)
 }
 
 func optionsHandler(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
diff --git a/api/server/router/local/local_experimental.go b/api/server/router/local/local_experimental.go
new file mode 100644
index 0000000000000..56da2f2a97924
--- /dev/null
+++ b/api/server/router/local/local_experimental.go
@@ -0,0 +1,65 @@
+// +build experimental
+
+package local
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+
+	"github.com/docker/docker/api/server/httputils"
+	dkrouter "github.com/docker/docker/api/server/router"
+	"github.com/docker/docker/runconfig"
+	"golang.org/x/net/context"
+)
+
+func addExperimentalRoutes(r *router) {
+	newRoutes := []dkrouter.Route{
+		NewPostRoute("/containers/{name:.*}/checkpoint", r.postContainersCheckpoint),
+		NewPostRoute("/containers/{name:.*}/restore", r.postContainersRestore),
+	}
+
+	r.routes = append(r.routes, newRoutes...)
+}
+
+func (s *router) postContainersCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := httputils.CheckForJSON(r); err != nil {
+		return err
+	}
+
+	criuOpts := &runconfig.CriuConfig{}
+	if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil {
+		return err
+	}
+
+	if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil {
+		return err
+	}
+
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
+
+func (s *router) postContainersRestore(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
+	if vars == nil {
+		return fmt.Errorf("Missing parameter")
+	}
+	if err := httputils.CheckForJSON(r); err != nil {
+		return err
+	}
+
+	restoreOpts := runconfig.RestoreConfig{}
+	if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil {
+		return err
+	}
+
+	if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil {
+		return err
+	}
+
+	w.WriteHeader(http.StatusNoContent)
+	return nil
+}
diff --git a/api/server/router/local/local_stable.go b/api/server/router/local/local_stable.go
new file mode 100644
index 0000000000000..7c6c012be06ae
--- /dev/null
+++ b/api/server/router/local/local_stable.go
@@ -0,0 +1,6 @@
+// +build !experimental
+
+package local
+
+func addExperimentalRoutes(r *router) {
+}
diff --git a/api/server/server.go b/api/server/server.go
index 972bd10f76bd2..b90d704c36147 100644
--- a/api/server/server.go
+++ b/api/server/server.go
@@ -119,36 +119,6 @@ func (s *HTTPServer) Close() error {
 	return s.l.Close()
 }
 
-func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
-	if vars == nil {
-		return fmt.Errorf("Missing parameter")
-	}
-	if err := parseForm(r); err != nil {
-		return err
-	}
-	job := eng.Job("checkpoint", vars["name"])
-	if err := job.Run(); err != nil {
-		return err
-	}
-	w.WriteHeader(http.StatusNoContent)
-	return nil
-}
-
-func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
-	if vars == nil {
-		return fmt.Errorf("Missing parameter")
-	}
-	if err := parseForm(r); err != nil {
-		return err
-	}
-	job := eng.Job("restore", vars["name"])
-	if err := job.Run(); err != nil {
-		return err
-	}
-	w.WriteHeader(http.StatusNoContent)
-	return nil
-}
-
 func writeCorsHeaders(w http.ResponseWriter, r *http.Request, corsHeaders string) {
 	logrus.Debugf("CORS header is enabled and set to: %s", corsHeaders)
 	w.Header().Add("Access-Control-Allow-Origin", corsHeaders)
diff --git a/api/types/types.go b/api/types/types.go
index a6cec44aac2db..e87441f65846c 100644
--- a/api/types/types.go
+++ b/api/types/types.go
@@ -235,17 +235,19 @@ type ExecStartCheck struct {
 // ContainerState stores container's running state
 // it's part of ContainerJSONBase and will return by "inspect" command
 type ContainerState struct {
-	Status     string
-	Running    bool
-	Paused     bool
-	Restarting bool
-	OOMKilled  bool
-	Dead       bool
-	Pid        int
-	ExitCode   int
-	Error      string
-	StartedAt  string
-	FinishedAt string
+	Status         string
+	Running        bool
+	Paused         bool
+	Checkpointed   bool
+	Restarting     bool
+	OOMKilled      bool
+	Dead           bool
+	Pid            int
+	ExitCode       int
+	Error          string
+	StartedAt      string
+	FinishedAt     string
+	CheckpointedAt string `json:"-"`
 }
 
 // ContainerJSONBase contains response of Remote API:
diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go
index f6057c6a028f9..674ce620cfbe5 100644
--- a/daemon/checkpoint.go
+++ b/daemon/checkpoint.go
@@ -1,55 +1,50 @@
 package daemon
 
 import (
-	"github.com/docker/docker/engine"
-)
+	"fmt"
+	"os"
+	"path/filepath"
 
-// Checkpoint a running container.
-func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status {
-	if len(job.Args) != 1 {
-		return job.Errorf("Usage: %s CONTAINER\n", job.Name)
-	}
+	"github.com/docker/docker/runconfig"
+)
 
-	name := job.Args[0]
+// ContainerCheckpoint checkpoints the process running in a container with CRIU
+func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error {
 	container, err := daemon.Get(name)
 	if err != nil {
-		return job.Error(err)
+		return err
 	}
 	if !container.IsRunning() {
-		return job.Errorf("Container %s not running", name)
+		return fmt.Errorf("Container %s not running", name)
 	}
 
-	if err := container.Checkpoint(); err != nil {
-		return job.Errorf("Cannot checkpoint container %s: %s", name, err)
+	if opts.ImagesDirectory == "" {
+		opts.ImagesDirectory = filepath.Join(container.root, "criu.image")
+		if err := os.MkdirAll(opts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
+			return err
+		}
 	}
 
-	container.LogEvent("checkpoint")
-	return engine.StatusOK
-}
-
-// Restore a checkpointed container.
-func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status {
-	if len(job.Args) != 1 {
-		return job.Errorf("Usage: %s CONTAINER\n", job.Name)
+	if opts.WorkDirectory == "" {
+		opts.WorkDirectory = filepath.Join(container.root, "criu.work")
+		if err := os.MkdirAll(opts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
+			return err
+		}
 	}
 
-	name := job.Args[0]
-	container, err := daemon.Get(name)
-	if err != nil {
-		return job.Error(err)
-	}
-	if container.IsRunning() {
-		return job.Errorf("Container %s already running", name)
+	if err := daemon.Checkpoint(container, opts); err != nil {
+		return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
 	}
-	if !container.State.IsCheckpointed() {
-		return job.Errorf("Container %s is not checkpointed", name)
+
+	container.SetCheckpointed(opts.LeaveRunning)
+
+	if opts.LeaveRunning == false {
+		daemon.Cleanup(container)
 	}
 
-	if err := container.Restore(); err != nil {
-		container.LogEvent("die")
-		return job.Errorf("Cannot restore container %s: %s", name, err)
+	if err := container.toDisk(); err != nil {
+		return fmt.Errorf("Cannot update config for container: %s", err)
 	}
 
-	container.LogEvent("restore")
-	return engine.StatusOK
+	return nil
 }
diff --git a/daemon/container.go b/daemon/container.go
index d0f8aab5f1a57..5bbf4feb61f75 100644
--- a/daemon/container.go
+++ b/daemon/container.go
@@ -288,41 +288,6 @@ func validateID(id string) error {
 	return nil
 }
 
-func (container *Container) Checkpoint() error {
-	return container.daemon.Checkpoint(container)
-}
-
-func (container *Container) Restore() error {
-	var err error
-
-	container.Lock()
-	defer container.Unlock()
-
-	defer func() {
-		if err != nil {
-			container.cleanup()
-		}
-	}()
-
-	if err = container.initializeNetworking(); err != nil {
-		return err
-	}
-
-	linkedEnv, err := container.setupLinkedContainers()
-	if err != nil {
-		return err
-	}
-	if err = container.setupWorkingDirectory(); err != nil {
-		return err
-	}
-	env := container.createDaemonEnvironment(linkedEnv)
-	if err = populateCommandRestore(container, env); err != nil {
-		return err
-	}
-
-	return container.waitForRestore()
-}
-
 // Returns true if the container exposes a certain port
 func (container *Container) exposes(p nat.Port) bool {
 	_, exists := container.Config.ExposedPorts[p]
@@ -370,29 +335,6 @@ func (container *Container) StartLogger(cfg runconfig.LogConfig) (logger.Logger,
 	return c(ctx)
 }
 
-// Like waitForStart() but for restoring a container.
-//
-// XXX Does RestartPolicy apply here?
-func (container *Container) waitForRestore() error {
-	container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)
-
-	// After calling promise.Go() we'll have two goroutines:
-	// - The current goroutine that will block in the select
-	//   below until restore is done.
-	// - A new goroutine that will restore the container and
-	//   wait for it to exit.
-	select {
-	case <-container.monitor.restoreSignal:
-		if container.ExitCode != 0 {
-			return fmt.Errorf("restore process failed")
-		}
-	case err := <-promise.Go(container.monitor.Restore):
-		return err
-	}
-
-	return nil
-}
-
 func (container *Container) getProcessLabel() string {
 	// even if we have a process label return "" if we are running
 	// in privileged mode
diff --git a/daemon/container_unix.go b/daemon/container_unix.go
index 91548b73c8a86..faf1698b807c0 100644
--- a/daemon/container_unix.go
+++ b/daemon/container_unix.go
@@ -807,7 +807,7 @@ func (daemon *Daemon) updateNetwork(container *Container) error {
 	return nil
 }
 
-func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([]libnetwork.EndpointOption, error) {
+func (container *Container) buildCreateEndpointOptions(n libnetwork.Network, isRestoring bool) ([]libnetwork.EndpointOption, error) {
 	var (
 		portSpecs     = make(nat.PortSet)
 		bindings      = make(nat.PortMap)
@@ -896,10 +896,18 @@ func (container *Container) buildCreateEndpointOptions(n libnetwork.Network) ([]
 		createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption))
 	}
 
+	/*if isRestoring && container.NetworkSettings.IPAddress != "" {
+		genericOption := options.Generic{
+			netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress),
+		}
+
+		createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption))
+	}*/
+
 	return createOptions, nil
 }
 
-func (daemon *Daemon) allocateNetwork(container *Container) error {
+func (daemon *Daemon) allocateNetwork(container *Container, isRestoring bool) error {
 	controller := daemon.netController
 
 	// Cleanup any stale sandbox left over due to ungraceful daemon shutdown
@@ -931,7 +939,7 @@ func (daemon *Daemon) allocateNetwork(container *Container) error {
 	}
 
 	for n := range container.NetworkSettings.Networks {
-		if err := daemon.connectToNetwork(container, n, updateSettings); err != nil {
+		if err := daemon.connectToNetwork(container, n, updateSettings, isRestoring); err != nil {
 			return err
 		}
 	}
@@ -956,7 +964,7 @@ func (daemon *Daemon) ConnectToNetwork(container *Container, idOrName string) er
 	if !container.Running {
 		return derr.ErrorCodeNotRunning.WithArgs(container.ID)
 	}
-	if err := daemon.connectToNetwork(container, idOrName, true); err != nil {
+	if err := daemon.connectToNetwork(container, idOrName, true, false); err != nil {
 		return err
 	}
 	if err := container.toDiskLocking(); err != nil {
@@ -965,7 +973,7 @@ func (daemon *Daemon) ConnectToNetwork(container *Container, idOrName string) er
 	return nil
 }
 
-func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, updateSettings bool) (err error) {
+func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, updateSettings bool, isRestoring bool) (err error) {
 	if container.hostConfig.NetworkMode.IsContainer() {
 		return runconfig.ErrConflictSharedNetwork
 	}
@@ -998,7 +1006,7 @@ func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, up
 		return err
 	}
 
-	createOptions, err := container.buildCreateEndpointOptions(n)
+	createOptions, err := container.buildCreateEndpointOptions(n, isRestoring)
 	if err != nil {
 		return err
 	}
@@ -1045,7 +1053,7 @@ func (daemon *Daemon) connectToNetwork(container *Container, idOrName string, up
 	return nil
 }
 
-func (daemon *Daemon) initializeNetworking(container *Container) error {
+func (daemon *Daemon) initializeNetworking(container *Container, isRestoring bool) error {
 	var err error
 
 	if container.hostConfig.NetworkMode.IsContainer() {
@@ -1076,7 +1084,7 @@ func (daemon *Daemon) initializeNetworking(container *Container) error {
 
 	}
 
-	if err := daemon.allocateNetwork(container); err != nil {
+	if err := daemon.allocateNetwork(container, isRestoring); err != nil {
 		return err
 	}
 
diff --git a/daemon/container_windows.go b/daemon/container_windows.go
index 2d8eb87a0ad40..97dbdbf5a24ee 100644
--- a/daemon/container_windows.go
+++ b/daemon/container_windows.go
@@ -36,7 +36,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string
 	return container.Config.Env
 }
 
-func (daemon *Daemon) initializeNetworking(container *Container) error {
+func (daemon *Daemon) initializeNetworking(container *Container, isRestoring bool) error {
 	return nil
 }
 
@@ -153,7 +153,7 @@ func (daemon *Daemon) setNetworkNamespaceKey(containerID string, pid int) error
 }
 
 // allocateNetwork is a no-op on Windows.
-func (daemon *Daemon) allocateNetwork(container *Container) error {
+func (daemon *Daemon) allocateNetwork(container *Container, isRestoring bool) error {
 	return nil
 }
 
diff --git a/daemon/daemon.go b/daemon/daemon.go
index b33527e611b71..5288976079895 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -966,22 +966,18 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e
 	return daemon.execDriver.Run(c.command, pipes, hooks)
 }
 
-func (daemon *Daemon) Checkpoint(c *Container) error {
-	if err := daemon.execDriver.Checkpoint(c.command); err != nil {
-		return err
-	}
-	c.SetCheckpointed()
-	return nil
+// Checkpoint the container
+func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error {
+	return daemon.execDriver.Checkpoint(c.command, opts)
 }
 
-func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
-	// Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go).
-	_, err := daemon.driver.Get(c.ID, c.GetMountLabel())
-	if err != nil {
-		return 0, err
+// Restore the container
+func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.DriverCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) {
+	hooks := execdriver.Hooks{
+		Restore: restoreCallback,
 	}
 
-	exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback)
+	exitCode, err := daemon.execDriver.Restore(c.command, pipes, hooks, opts, forceRestore)
 	return exitCode, err
 }
 
diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go
index bcf32fcaf2d4d..ef3551c361791 100644
--- a/daemon/execdriver/driver.go
+++ b/daemon/execdriver/driver.go
@@ -6,6 +6,7 @@ import (
 	"os/exec"
 	"time"
 
+	"github.com/docker/docker/runconfig"
 	"github.com/opencontainers/runc/libcontainer"
 )
 
@@ -37,10 +38,10 @@ type Hooks struct {
 	Start DriverCallback
 	// PostStop is called after the container process exits
 	PostStop []DriverCallback
+	// Restore is called after the container is restored
+	Restore DriverCallback
 }
 
-type RestoreCallback func(*ProcessConfig, int)
-
 // Info is driver specific information based on
 // processes registered with the driver
 type Info interface {
@@ -74,9 +75,11 @@ type Driver interface {
 	// Unpause unpauses a container.
 	Unpause(c *Command) error
 
-	Checkpoint(c *Command) error
+	// Checkpoints a container (with criu).
+	Checkpoint(c *Command, opts *runconfig.CriuConfig) error
 
-	Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
+	// Restores a checkpoint image into a container (with criu).
+	Restore(c *Command, pipes *Pipes, hooks Hooks, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error)
 
 	// Name returns the name of the driver.
 	Name() string
diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go
index f87a40a0083ec..d67783b2ecb44 100644
--- a/daemon/execdriver/native/driver.go
+++ b/daemon/execdriver/native/driver.go
@@ -20,7 +20,7 @@ import (
 	"github.com/docker/docker/pkg/reexec"
 	sysinfo "github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/term"
-	"github.com/docker/docker/utils"
+	"github.com/docker/docker/runconfig"
 	"github.com/opencontainers/runc/libcontainer"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
@@ -159,10 +159,13 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
 	d.activeContainers[c.ID] = cont
 	d.Unlock()
 	defer func() {
-		if !destroyed {
-			cont.Destroy()
+		status, _ := cont.Status()
+		if status != libcontainer.Checkpointed {
+			if !destroyed {
+				cont.Destroy()
+			}
+			d.cleanContainer(c.ID)
 		}
-		d.cleanContainer(c.ID)
 	}()
 
 	if err := cont.Start(p); err != nil {
@@ -303,49 +306,28 @@ func (d *Driver) Unpause(c *execdriver.Command) error {
 	return active.Resume()
 }
 
-// XXX Where is the right place for the following
-//     const and getCheckpointImageDir() function?
-const (
-	containersDir = "/var/lib/docker/containers"
-	criuImgDir    = "criu_img"
-)
-
-func getCheckpointImageDir(containerId string) string {
-	return filepath.Join(containersDir, containerId, criuImgDir)
+func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts {
+	return &libcontainer.CriuOpts{
+		ImagesDirectory:         runconfigOpts.ImagesDirectory,
+		WorkDirectory:           runconfigOpts.WorkDirectory,
+		LeaveRunning:            runconfigOpts.LeaveRunning,
+		TcpEstablished:          runconfigOpts.TCPEstablished,
+		ExternalUnixConnections: runconfigOpts.ExternalUnixConnections,
+		ShellJob:                runconfigOpts.ShellJob,
+		FileLocks:               runconfigOpts.FileLocks,
+	}
 }
 
-func (d *driver) Checkpoint(c *execdriver.Command) error {
+// Checkpoint implements the exec driver Driver interface.
+func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error {
 	active := d.activeContainers[c.ID]
 	if active == nil {
 		return fmt.Errorf("active container for %s does not exist", c.ID)
 	}
-	container := active.container
-
-	// Create an image directory for this container (which
-	// may already exist from a previous checkpoint).
-	imageDir := getCheckpointImageDir(c.ID)
-	err := os.MkdirAll(imageDir, 0700)
-	if err != nil && !os.IsExist(err) {
-		return err
-	}
-
-	// Copy container.json and state.json files to the CRIU
-	// image directory for later use during restore.  Do this
-	// before checkpointing because after checkpoint the container
-	// will exit and these files will be removed.
-	log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
-	srcFiles := []string{"container.json", "state.json"}
-	for _, f := range srcFiles {
-		srcFile := filepath.Join(d.root, c.ID, f)
-		dstFile := filepath.Join(imageDir, f)
-		if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
-			return err
-		}
-	}
 
 	d.Lock()
 	defer d.Unlock()
-	err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
+	err := active.Checkpoint(libcontainerCriuOpts(opts))
 	if err != nil {
 		return err
 	}
@@ -353,103 +335,90 @@ func (d *driver) Checkpoint(c *execdriver.Command) error {
 	return nil
 }
 
-type restoreOutput struct {
-	exitCode int
-	err      error
-}
+// Restore implements the exec driver Driver interface.
+func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) {
+	var (
+		cont libcontainer.Container
+		err  error
+	)
 
-func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
-	imageDir := getCheckpointImageDir(c.ID)
-	container, err := d.createRestoreContainer(c, imageDir)
+	destroyed := false
+	cont, err = d.factory.Load(c.ID)
 	if err != nil {
-		return 1, err
+		if forceRestore {
+			var config *configs.Config
+			config, err = d.createContainer(c, hooks)
+			if err != nil {
+				return execdriver.ExitStatus{ExitCode: -1}, err
+			}
+			cont, err = d.factory.Create(c.ID, config)
+			if err != nil {
+				return execdriver.ExitStatus{ExitCode: -1}, err
+			}
+		} else {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
 	}
 
-	var term execdriver.Terminal
-
-	if c.ProcessConfig.Tty {
-		term, err = NewTtyConsole(&c.ProcessConfig, pipes)
-	} else {
-		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
+	p := &libcontainer.Process{
+		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
+		Env:  c.ProcessConfig.Env,
+		Cwd:  c.WorkingDir,
+		User: c.ProcessConfig.User,
 	}
-	if err != nil {
-		return -1, err
+
+	config := cont.Config()
+	if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil {
+		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
-	c.ProcessConfig.Terminal = term
 
 	d.Lock()
-	d.activeContainers[c.ID] = &activeContainer{
-		container: container,
-		cmd:       &c.ProcessConfig.Cmd,
-	}
+	d.activeContainers[c.ID] = cont
 	d.Unlock()
-	defer d.cleanContainer(c.ID)
+	defer func() {
+		status, _ := cont.Status()
+		if status != libcontainer.Checkpointed {
+			if !destroyed {
+				cont.Destroy()
+			}
+			d.cleanContainer(c.ID)
+		}
+	}()
 
-	// Since the CRIU binary exits after restoring the container, we
-	// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
-	// so that it'll be owned by this process (Docker daemon) after restore.
-	//
-	// XXX This really belongs to where the Docker daemon starts.
-	if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
-		return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
+	if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil {
+		return execdriver.ExitStatus{ExitCode: -1}, err
 	}
 
-	restoreOutputChan := make(chan restoreOutput, 1)
-	waitForRestore := make(chan struct{})
-
-	go func() {
-		exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
-			func(child *os.File, args []string) *exec.Cmd {
-				cmd := new(exec.Cmd)
-				cmd.Path = d.initPath
-				cmd.Args = append([]string{
-					DriverName,
-					"-restore",
-					"-pipe", "3",
-					"--",
-				}, args...)
-				cmd.ExtraFiles = []*os.File{child}
-				return cmd
-			},
-			func(restorePid int) error {
-				log.CRDbg("restorePid=%d", restorePid)
-				if restorePid == 0 {
-					restoreCallback(&c.ProcessConfig, 0)
-					return nil
-				}
-
-				// The container.json file should be written *after* the container
-				// has started because its StdFds cannot be initialized before.
-				//
-				// XXX How do we handle error here?
-				d.writeContainerFile(container, c.ID)
-				close(waitForRestore)
-				if restoreCallback != nil {
-					c.ProcessConfig.Process, err = os.FindProcess(restorePid)
-					if err != nil {
-						log.Debugf("cannot find restored process %d", restorePid)
-						return err
-					}
-					c.ContainerPid = c.ProcessConfig.Process.Pid
-					restoreCallback(&c.ProcessConfig, c.ContainerPid)
-				}
-				return nil
-			})
-		restoreOutputChan <- restoreOutput{exitCode, err}
-	}()
+	oom := notifyOnOOM(cont)
+	if hooks.Restore != nil {
+		pid, err := p.Pid()
+		if err != nil {
+			p.Signal(os.Kill)
+			p.Wait()
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		hooks.Restore(&c.ProcessConfig, pid, oom)
+	}
 
-	select {
-	case restoreOutput := <-restoreOutputChan:
-		// there was an error
-		return restoreOutput.exitCode, restoreOutput.err
-	case <-waitForRestore:
-		// container restored
-		break
+	waitF := p.Wait
+	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
+		// we need such hack for tracking processes with inherited fds,
+		// because cmd.Wait() waiting for all streams to be copied
+		waitF = waitInPIDHost(p, cont)
+	}
+	ps, err := waitF()
+	if err != nil {
+		execErr, ok := err.(*exec.ExitError)
+		if !ok {
+			return execdriver.ExitStatus{ExitCode: -1}, err
+		}
+		ps = execErr.ProcessState
 	}
 
-	// Wait for the container to exit.
-	restoreOutput := <-restoreOutputChan
-	return restoreOutput.exitCode, restoreOutput.err
+	cont.Destroy()
+	destroyed = true
+	_, oomKill := <-oom
+	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
 }
 
 // Terminate implements the exec driver Driver interface.
diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go
index 587032e6e50f8..788c4e0f1d4c8 100644
--- a/daemon/execdriver/windows/windows.go
+++ b/daemon/execdriver/windows/windows.go
@@ -109,3 +109,13 @@ func setupEnvironmentVariables(a []string) map[string]string {
 	}
 	return r
 }
+
+// Checkpoint does not currently implement checkpoint, but complies to the Driver interface
+func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error {
+	return fmt.Errorf("Windows: Containers cannot be checkpointed")
+}
+
+// Restore does not currently implement restore, but complies to the Driver interface
+func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) {
+	return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored")
+}
diff --git a/daemon/inspect.go b/daemon/inspect.go
index 3107b3bd55e3e..bdecc27c1c7aa 100644
--- a/daemon/inspect.go
+++ b/daemon/inspect.go
@@ -104,17 +104,19 @@ func (daemon *Daemon) getInspectData(container *Container, size bool) (*types.Co
 	}
 
 	containerState := &types.ContainerState{
-		Status:     container.State.StateString(),
-		Running:    container.State.Running,
-		Paused:     container.State.Paused,
-		Restarting: container.State.Restarting,
-		OOMKilled:  container.State.OOMKilled,
-		Dead:       container.State.Dead,
-		Pid:        container.State.Pid,
-		ExitCode:   container.State.ExitCode,
-		Error:      container.State.Error,
-		StartedAt:  container.State.StartedAt.Format(time.RFC3339Nano),
-		FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano),
+		Status:         container.State.StateString(),
+		Running:        container.State.Running,
+		Paused:         container.State.Paused,
+		Checkpointed:   container.State.Checkpointed,
+		Restarting:     container.State.Restarting,
+		OOMKilled:      container.State.OOMKilled,
+		Dead:           container.State.Dead,
+		Pid:            container.State.Pid,
+		ExitCode:       container.State.ExitCode,
+		Error:          container.State.Error,
+		StartedAt:      container.State.StartedAt.Format(time.RFC3339Nano),
+		FinishedAt:     container.State.FinishedAt.Format(time.RFC3339Nano),
+		CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano),
 	}
 
 	contJSONBase := &types.ContainerJSONBase{
diff --git a/daemon/monitor.go b/daemon/monitor.go
index 375a1fc62ba6d..380a8675c33c2 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -31,6 +31,8 @@ type containerSupervisor interface {
 	StartLogging(*Container) error
 	// Run starts a container
 	Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error)
+	// Restore restores a container that was previously checkpointed
+	Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.DriverCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error)
 	// IsShuttingDown tells whether the supervisor is shutting down or not
 	IsShuttingDown() bool
 }
@@ -128,14 +130,30 @@ func (m *containerMonitor) Close() error {
 
 // Start starts the containers process and monitors it according to the restart policy
 func (m *containerMonitor) Start() error {
+	return m.start(nil, false)
+}
+
+// Restore restores the containers from an image and monitors it according to the restart policy
+func (m *containerMonitor) Restore(restoreOpts *runconfig.CriuConfig, forceRestore bool) error {
+	return m.start(restoreOpts, forceRestore)
+}
+
+// Internal method to start or restore the containers and monitors it
+func (m *containerMonitor) start(restoreOpts *runconfig.CriuConfig, forceRestore bool) error {
 	var (
 		err        error
 		exitStatus execdriver.ExitStatus
 		// this variable indicates where we in execution flow:
 		// before Run or after
-		afterRun bool
+		afterRun    bool
+		isRestoring bool
 	)
 
+	// we only want to restore once, but upon restart we should simply
+	// start the container normally, so isRestoring tells us where we are,
+	// and the initial value is whether or not we were provided restore opts
+	isRestoring = restoreOpts != nil
+
 	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
 	defer func() {
 		if afterRun {
@@ -156,19 +174,30 @@ func (m *containerMonitor) Start() error {
 	for {
 		m.container.RestartCount++
 
-		if err := m.supervisor.StartLogging(m.container); err != nil {
-			m.resetContainer(false)
+		if m.container.logDriver == nil {
+			if err := m.supervisor.StartLogging(m.container); err != nil {
+				m.resetContainer(false)
 
-			return err
+				return err
+			}
 		}
 
 		pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
 
-		m.logEvent("start")
-
 		m.lastStartTime = time.Now()
 
-		if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil {
+		if isRestoring {
+			m.logEvent("restore")
+
+			exitStatus, err = m.supervisor.Restore(m.container, pipes, m.restoreCallback, restoreOpts, forceRestore)
+			isRestoring = false
+		} else {
+			m.logEvent("start")
+
+			exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback)
+		}
+
+		if err != nil {
 			// if we receive an internal error from the initial start of a container then lets
 			// return it instead of entering the restart loop
 			// set to 127 for container cmd not found/does not exist)
@@ -228,49 +257,6 @@ func (m *containerMonitor) Start() error {
 	}
 }
 
-// Like Start() but for restoring a container.
-func (m *containerMonitor) Restore() error {
-	var (
-		err error
-		// XXX The following line should be changed to
-		//     exitStatus execdriver.ExitStatus to match Start()
-		exitCode     int
-		afterRestore bool
-	)
-
-	defer func() {
-		if afterRestore {
-			m.container.Lock()
-			m.container.setStopped(&execdriver.ExitStatus{exitCode, false})
-			defer m.container.Unlock()
-		}
-		m.Close()
-	}()
-
-	if err := m.container.startLoggingToDisk(); err != nil {
-		m.resetContainer(false)
-		return err
-	}
-
-	pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
-
-	m.container.LogEvent("restore")
-	m.lastStartTime = time.Now()
-	if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback); err != nil {
-		log.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode)
-		m.container.ExitCode = -1
-		m.resetContainer(false)
-		return err
-	}
-	afterRestore = true
-
-	m.container.ExitCode = exitCode
-	m.resetMonitor(err == nil && exitCode == 0)
-	m.container.LogEvent("die")
-	m.resetContainer(true)
-	return err
-}
-
 // resetMonitor resets the stateful fields on the containerMonitor based on the
 // previous runs success or failure.  Regardless of success, if the container had
 // an execution time of more than 10s then reset the timer back to the default
@@ -367,7 +353,23 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid
 }
 
 // Like callback() but for restoring a container.
-func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) {
+func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int, chOOM <-chan struct{}) error {
+	go func() {
+		_, ok := <-chOOM
+		if ok {
+			m.logEvent("oom")
+		}
+	}()
+
+	if processConfig.Tty {
+		// The callback is called after the process Start()
+		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
+		// which we close here.
+		if c, ok := processConfig.Stdout.(io.Closer); ok {
+			c.Close()
+		}
+	}
+
 	// If restorePid is 0, it means that restore failed.
 	if restorePid != 0 {
 		m.container.setRunning(restorePid)
@@ -383,10 +385,12 @@ func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConf
 	if restorePid != 0 {
 		// Write config.json and hostconfig.json files
 		// to /var/lib/docker/containers/<ID>.
-		if err := m.container.ToDisk(); err != nil {
-			log.Debugf("%s", err)
+		if err := m.container.toDiskLocking(); err != nil {
+			logrus.Errorf("Error saving container to disk: %s", err)
 		}
 	}
+
+	return nil
 }
 
 // resetContainer resets the container's IO and ensures that the command is able to be executed again
diff --git a/daemon/restore.go b/daemon/restore.go
new file mode 100644
index 0000000000000..ae66b5478ef75
--- /dev/null
+++ b/daemon/restore.go
@@ -0,0 +1,139 @@
+package daemon
+
+import (
+	"fmt"
+	"path/filepath"
+
+	derr "github.com/docker/docker/errors"
+	"github.com/docker/docker/pkg/promise"
+	"github.com/docker/docker/runconfig"
+)
+
+// ContainerRestore restores the process in a container with CRIU
+func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error {
+	container, err := daemon.Get(name)
+	if err != nil {
+		return err
+	}
+
+	if !forceRestore {
+		// TODO: It's possible we only want to bypass the checkpointed check,
+		// I'm not sure how this will work if the container is already running
+		if container.IsRunning() {
+			return fmt.Errorf("Container %s already running", name)
+		}
+
+		if !container.IsCheckpointed() {
+			return fmt.Errorf("Container %s is not checkpointed", name)
+		}
+	} else {
+		if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" {
+			return fmt.Errorf("You must specify an image directory to restore from %s", name)
+		}
+	}
+
+	if opts.ImagesDirectory == "" {
+		opts.ImagesDirectory = filepath.Join(container.root, "criu.image")
+	}
+
+	if opts.WorkDirectory == "" {
+		opts.WorkDirectory = filepath.Join(container.root, "criu.work")
+	}
+
+	if err = daemon.containerRestore(container, opts, forceRestore); err != nil {
+		return fmt.Errorf("Cannot restore container %s: %s", name, err)
+	}
+
+	return nil
+}
+
+// containerRestore prepares the container to be restored by setting up
+// everything the container needs, just like containerStart, such as
+// storage and networking, as well as links between containers.
+// The container is left waiting for a signal that restore has finished
+func (daemon *Daemon) containerRestore(container *Container, opts *runconfig.CriuConfig, forceRestore bool) error {
+	var err error
+	container.Lock()
+	defer container.Unlock()
+
+	if container.Running {
+		return nil
+	}
+
+	if container.removalInProgress || container.Dead {
+		return derr.ErrorCodeContainerBeingRemoved
+	}
+
+	// if we encounter an error during start we need to ensure that any other
+	// setup has been cleaned up properly
+	defer func() {
+		if err != nil {
+			container.setError(err)
+			// if no one else has set it, make sure we don't leave it at zero
+			if container.ExitCode == 0 {
+				container.ExitCode = 128
+			}
+			container.toDisk()
+			daemon.Cleanup(container)
+			daemon.LogContainerEvent(container, "die")
+		}
+	}()
+
+	if err := daemon.conditionalMountOnStart(container); err != nil {
+		return err
+	}
+
+	// Make sure NetworkMode has an acceptable value. We do this to ensure
+	// backwards API compatibility.
+	container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig)
+
+	if err := daemon.initializeNetworking(container, true); err != nil {
+		return err
+	}
+	linkedEnv, err := daemon.setupLinkedContainers(container)
+	if err != nil {
+		return err
+	}
+	if err := container.setupWorkingDirectory(); err != nil {
+		return err
+	}
+	env := container.createDaemonEnvironment(linkedEnv)
+	if err := daemon.populateCommand(container, env); err != nil {
+		return err
+	}
+
+	if !container.hostConfig.IpcMode.IsContainer() && !container.hostConfig.IpcMode.IsHost() {
+		if err := daemon.setupIpcDirs(container); err != nil {
+			return err
+		}
+	}
+
+	mounts, err := daemon.setupMounts(container)
+	if err != nil {
+		return err
+	}
+	mounts = append(mounts, container.ipcMounts()...)
+
+	container.command.Mounts = mounts
+	return daemon.waitForRestore(container, opts, forceRestore)
+}
+
+func (daemon *Daemon) waitForRestore(container *Container, opts *runconfig.CriuConfig, forceRestore bool) error {
+	container.monitor = daemon.newContainerMonitor(container, container.hostConfig.RestartPolicy)
+
+	// After calling promise.Go() we'll have two goroutines:
+	// - The current goroutine that will block in the select
+	//   below until restore is done.
+	// - A new goroutine that will restore the container and
+	//   wait for it to exit.
+	select {
+	case <-container.monitor.restoreSignal:
+		if container.ExitCode != 0 {
+			return fmt.Errorf("restore process failed")
+		}
+	case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }):
+		return err
+	}
+
+	return nil
+}
diff --git a/daemon/start.go b/daemon/start.go
index 747c44e808539..d0a795af73917 100644
--- a/daemon/start.go
+++ b/daemon/start.go
@@ -96,7 +96,7 @@ func (daemon *Daemon) containerStart(container *Container) (err error) {
 	// backwards API compatibility.
 	container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig)
 
-	if err := daemon.initializeNetworking(container); err != nil {
+	if err := daemon.initializeNetworking(container, false); err != nil {
 		return err
 	}
 	linkedEnv, err := daemon.setupLinkedContainers(container)
@@ -146,7 +146,7 @@ func (daemon *Daemon) waitForStart(container *Container) error {
 func (daemon *Daemon) Cleanup(container *Container) {
 
 	if container.IsCheckpointed() {
-		log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
+		logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
 	} else {
 		daemon.releaseNetwork(container)
 	}
diff --git a/daemon/state.go b/daemon/state.go
index 102ef6cd57e75..5515dc4b92498 100644
--- a/daemon/state.go
+++ b/daemon/state.go
@@ -31,7 +31,6 @@ type State struct {
 	FinishedAt        time.Time
 	CheckpointedAt    time.Time
 	waitChan          chan struct{}
-
 }
 
 // NewState creates a default state object with a fresh channel for state changes.
@@ -52,7 +51,9 @@ func (s *State) String() string {
 		}
 
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
-	} else if s.Checkpointed {
+	}
+
+	if s.Checkpointed {
 		return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt)))
 	}
 
@@ -87,6 +88,10 @@ func (s *State) StateString() string {
 		return "running"
 	}
 
+	if s.Checkpointed {
+		return "checkpointed"
+	}
+
 	if s.Dead {
 		return "dead"
 	}
@@ -268,10 +273,11 @@ func (s *State) setDead() {
 	s.Unlock()
 }
 
-func (s *State) SetCheckpointed() {
+// SetCheckpointed sets the container's status to indicate it has been checkpointed
+func (s *State) SetCheckpointed(leaveRunning bool) {
 	s.Lock()
 	s.CheckpointedAt = time.Now().UTC()
-	s.Checkpointed = true
+	s.Checkpointed = !leaveRunning
 	s.Running = false
 	s.Paused = false
 	s.Restarting = false
@@ -281,6 +287,12 @@ func (s *State) SetCheckpointed() {
 	s.Unlock()
 }
 
+// HasBeenCheckpointed indicates whether the container has ever been checkpointed
+func (s *State) HasBeenCheckpointed() bool {
+	return s.CheckpointedAt != time.Time{}
+}
+
+// IsCheckpointed indicates whether the container is currently checkpointed
 func (s *State) IsCheckpointed() bool {
 	return s.Checkpointed
 }
diff --git a/docker/docker.go b/docker/docker.go
index be1f51e5f81f7..c9e365f0a694c 100644
--- a/docker/docker.go
+++ b/docker/docker.go
@@ -36,7 +36,7 @@ func main() {
 		help := "\nCommands:\n"
 
 		for _, cmd := range dockerCommands {
-			help += fmt.Sprintf("    %-10.10s%s\n", cmd.Name, cmd.Description)
+			help += fmt.Sprintf("    %-11.11s%s\n", cmd.Name, cmd.Description)
 		}
 
 		help += "\nRun 'docker COMMAND --help' for more information on a command."
diff --git a/docker/flags_experimental.go b/docker/flags_experimental.go
new file mode 100644
index 0000000000000..608865d4e37b9
--- /dev/null
+++ b/docker/flags_experimental.go
@@ -0,0 +1,21 @@
+// +build experimental
+
+package main
+
+import (
+	"sort"
+
+	"github.com/docker/docker/cli"
+)
+
+func init() {
+	experimentalCommands := []cli.Command{
+		{"checkpoint", "Checkpoint one or more running containers"},
+		{"restore", "Restore one or more checkpointed containers"},
+	}
+
+	dockerCommands = append(dockerCommands, experimentalCommands...)
+
+	//Sorting logic required here to pass Command Sort Test.
+	sort.Sort(byName(dockerCommands))
+}
diff --git a/experimental/README.md b/experimental/README.md
index d2eff37d8d4e6..42758298c7403 100644
--- a/experimental/README.md
+++ b/experimental/README.md
@@ -2,7 +2,7 @@
 
 This page contains a list of features in the Docker engine which are
 experimental. Experimental features are **not** ready for production. They are
-provided for test and evaluation in your sandbox environments.  
+provided for test and evaluation in your sandbox environments.
 
 The information below describes each feature and the GitHub pull requests and
 issues associated with it. If necessary, links are provided to additional
@@ -73,9 +73,10 @@ to build a Docker binary with the experimental features enabled:
 
  * [External graphdriver plugins](plugins_graphdriver.md)
  * [User namespaces](userns.md)
+ * [Checkpoint & Restore](checkpoint_restore.md)
 
 ## How to comment on an experimental feature
 
-Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.  
+Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.
 
-Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user).  
+Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user).
diff --git a/experimental/checkpoint_restore.md b/experimental/checkpoint_restore.md
new file mode 100644
index 0000000000000..f3ed0b5898e85
--- /dev/null
+++ b/experimental/checkpoint_restore.md
@@ -0,0 +1,154 @@
+# Docker Checkpoint & Restore
+
+Checkpoint & Restore is a new feature that allows you to freeze a running
+container by checkpointing it, which turns its state into a collection of files
+on disk. Later, the container can be restored from the point it was frozen.
+
+This is accomplished using a tool called [CRIU](http://criu.org), which is an
+external dependency of this feature. A good overview of the history of
+checkpoint and restore in Docker is available in this
+[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html).
+
+## Installing CRIU
+
+If you use a Debian system, you can add the CRIU PPA and install with apt-get
+https://launchpad.net/~criu/+archive/ubuntu/ppa.
+
+Alternatively, you can [build CRIU from source](http://criu.org/Installation).
+
+## Use cases for checkpoint & restore
+
+This feature is currently focused on single-host use cases for checkpoint and
+restore. Here are a few:
+
+- Restarting / upgrading the docker daemon without stopping containers
+- Restarting the host machine without stopping/starting containers
+- Speeding up the start time of slow start applications
+- "Rewinding" processes to an earlier point in time
+- "Forensic debugging" of running processes
+
+Another primary use case of checkpoint & restore outside of Docker is the live
+migration of a server from one machine to another. This is possible with the
+current implementation, but not currently a priority (and so the workflow is
+not optimized for the task).
+
+## Using Checkpoint & Restore
+
+Two new top level commands are introduced in the CLI: `checkpoint` & `restore`.
+The options for checkpoint:
+
+    Usage:  docker checkpoint [OPTIONS] CONTAINER [CONTAINER...]
+
+    Checkpoint one or more running containers
+
+      --allow-shell=false      allow checkpointing shell jobs
+      --image-dir=             directory for storing checkpoint image files
+      --leave-running=false    leave the container running after checkpoint
+      --work-dir=              directory for storing log file
+
+And for restore:
+
+    Usage:  docker restore [OPTIONS] CONTAINER [CONTAINER...]
+
+    Restore one or more checkpointed containers
+
+      --allow-shell=false    allow restoring shell jobs
+      --force=false          bypass checks for current container state
+      --image-dir=           directory to restore image files from
+      --work-dir=            directory for restore log
+
+A simple example of using checkpoint & restore on a container:
+
+    $ docker run --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
+    > abc0123
+
+    $ docker checkpoint cr
+    > abc0123
+
+    $ docker restore cr
+    > abc0123
+
+This process just logs an incrementing counter to stdout. If you `docker logs`
+in between running/checkpoint/restoring you should see that the counter
+increases while the process is running, stops while it's checkpointed, and
+resumes from the point it left off once you restore.
+
+### Same container checkpoint/restore
+
+The above example falls into the category of "same container" use cases for c/r.
+Restarting the daemon is an example of this kind of use case. There is only one
+container here at any point in time. That container's status, once it is
+checkpointed, will be "Checkpointed" and docker inspect will contain that status
+as well as the time of the last checkpoint. The IP address and other container
+state do not change (see known issues at the bottom of this document).
+
+### New container checkpoint/restore
+
+Here's an example of a "new container" use case for c/r:
+
+    $ docker run some_image
+    > abc789
+
+    ## the container runs for a while
+
+    $ docker checkpoint --image-dir=/some/path abc789
+    > abc789
+
+At this point, we've created a checkpoint image at `/some/path` that encodes a
+process at the exact state we want it to be. Now, at some later point in time,
+we can put a copy of that exact state into a new container (perhaps many times):
+
+    $ docker create some_image
+    > def123
+
+    $ docker restore --force=true --image-dir=/some/path def123
+    > def123
+
+We created a new container (but didn't start it), and then we restored our
+checkpointed process into that container.
+
+This is obviously more involved than the simple use case shown earlier. It
+requires starting subsequent containers with the same configuration (e.g.
+the same mounted volumes, the same base image, etc.).
+
+### Options
+
+Checkpoint & Restore:
+
+      --image-dir=             directory for storing checkpoint image files
+
+Allows you to specify the path for writing a checkpoint image, or the path for
+the image you want to restore.
+
+      --work-dir=              directory for storing log file
+
+Allows you to specify the path for writing the CRIU log.
+
+      --leave-running=false    leave the container running after checkpoint
+
+Normally, when checkpointing a process, the process is stopped aftewrards.
+When this flag is enabled, the process keeps running after a checkpoint. This is
+useful if you want to capture a process at multiple points in time, for later
+use in debugging or rewinding a process for some reason. It's also used for
+minimizing downtime when checkpointing processes with a large memory footprint.
+
+Restore Only:
+
+      --force=false            force restoring into a container
+
+As shown in the "new container" example, this flag allows you to restore a
+checkpoint image into a container that was not previously checkpointed.
+Normally, docker would return an error when restoring into a container that
+has not been previously checkpointed.
+
+## Known Issues
+
+- Currently, networking is broken in this PR. Although it's implemented at the
+libcontainer level, the method used no longer works since the introduction of
+libnetwork. See:
+    - https://github.com/docker/libnetwork/pull/465
+    - https://github.com/boucher/docker/pull/15
+- There are likely several networking related issues to work out, like:
+    - ensuring IPs are reserved across daemon restarts
+    - ensuring port maps are reserved
+    - deciding how to deal with network resources in the "new container" model
diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go
new file mode 100644
index 0000000000000..09ec47a9a0d54
--- /dev/null
+++ b/integration-cli/docker_cli_checkpoint_test.go
@@ -0,0 +1,39 @@
+// +build experimental
+
+package main
+
+import (
+	"os/exec"
+	"strings"
+
+	"github.com/go-check/check"
+)
+
+func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) {
+	defer unpauseAllContainers()
+
+	runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
+	out, _, err := runCommandWithOutput(runCmd)
+	if err != nil {
+		c.Fatalf("failed to run container: %v, output: %q", err, out)
+	}
+
+	containerID := strings.TrimSpace(out)
+	checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID)
+	out, _, err = runCommandWithOutput(checkpointCmd)
+	if err != nil {
+		c.Fatalf("failed to checkpoint container: %v, output: %q", err, out)
+	}
+
+	out, err = inspectField(containerID, "State.Checkpointed")
+	c.Assert(out, check.Equals, "true")
+
+	restoreCmd := exec.Command(dockerBinary, "restore", containerID)
+	out, _, _, err = runCommandWithStdoutStderr(restoreCmd)
+	if err != nil {
+		c.Fatalf("failed to restore container: %v, output: %q", err, out)
+	}
+
+	out, err = inspectField(containerID, "State.Checkpointed")
+	c.Assert(out, check.Equals, "false")
+}
diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go
index 5c184393164b8..1a0557621af92 100644
--- a/integration-cli/docker_cli_help_test.go
+++ b/integration-cli/docker_cli_help_test.go
@@ -227,7 +227,7 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) {
 
 		// Number of commands for standard release and experimental release
 		standard := 40
-		experimental := 1
+		experimental := 3
 		expected := standard + experimental
 		if isLocalDaemon {
 			expected++ // for the daemon command
diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md
index 5ea659fa44e2a..535e49b822fda 100644
--- a/project/PACKAGERS.md
+++ b/project/PACKAGERS.md
@@ -303,6 +303,9 @@ by having support for them in the kernel or userspace. A few examples include:
   least the "auplink" utility from aufs-tools)
 * BTRFS graph driver (requires BTRFS support enabled in the kernel)
 * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module)
+* Checkpoint/Restore containers:
+  - requires criu version 1.6 or later (criu.org)
+  - requires kernel version 3.19 or later if using overlay-fs
 
 ## Daemon Init Script
 
diff --git a/runconfig/restore.go b/runconfig/restore.go
new file mode 100644
index 0000000000000..8993294411a96
--- /dev/null
+++ b/runconfig/restore.go
@@ -0,0 +1,18 @@
+package runconfig
+
+// CriuConfig holds configuration options passed down to libcontainer and CRIU
+type CriuConfig struct {
+	ImagesDirectory         string
+	WorkDirectory           string
+	LeaveRunning            bool
+	TCPEstablished          bool
+	ExternalUnixConnections bool
+	ShellJob                bool
+	FileLocks               bool
+}
+
+// RestoreConfig holds the restore command options, which is a superset of the CRIU options
+type RestoreConfig struct {
+	CriuOpts     CriuConfig
+	ForceRestore bool
+}

From 984913d30a6df2bcfc48dc08ae0522e9f0ccb698 Mon Sep 17 00:00:00 2001
From: Hui Kang <hkang.sunysb@gmail.com>
Date: Sun, 22 Nov 2015 15:46:46 -0500
Subject: [PATCH 4/4] Commit the filesystem layer during checkpoint

- The aufs layer is commited during checkpoint
- criu image path and image ID is persisted to the container config file

Signed-off-by: Hui Kang <hkang.sunysb@gmail.com>
---
 daemon/checkpoint.go     | 15 +++++++++++++++
 daemon/container_unix.go |  2 ++
 daemon/daemon_unix.go    |  5 +++--
 daemon/restore.go        |  4 ++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go
index 674ce620cfbe5..81c3a0d327dac 100644
--- a/daemon/checkpoint.go
+++ b/daemon/checkpoint.go
@@ -42,6 +42,21 @@ func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfi
 		daemon.Cleanup(container)
 	}
 
+	// commit the filesystem as well, support AUFS only
+	commitCfg := &ContainerCommitConfig{
+		Pause:  true,
+		Config: container.Config,
+	}
+	img, err := daemon.Commit(name, commitCfg)
+	if err != nil {
+		return err
+	}
+	// Update the criu image path and image ID of the container
+	criuImagePath := opts.ImagesDirectory
+	container.CriuimagePaths[criuImagePath] = img.ID
+	// Update image layer of the committed container
+	container.ImageID = img.ID
+
 	if err := container.toDisk(); err != nil {
 		return fmt.Errorf("Cannot update config for container: %s", err)
 	}
diff --git a/daemon/container_unix.go b/daemon/container_unix.go
index faf1698b807c0..ba36cd2c098db 100644
--- a/daemon/container_unix.go
+++ b/daemon/container_unix.go
@@ -61,6 +61,8 @@ type Container struct {
 
 	Volumes   map[string]string // Deprecated since 1.7, kept for backwards compatibility
 	VolumesRW map[string]bool   // Deprecated since 1.7, kept for backwards compatibility
+
+	CriuimagePaths map[string]string // Format: <image path: associated filesysem image ID>
 }
 
 func killProcessDirectly(container *Container) error {
diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go
index e6a6c0f05ec92..bf4e06a54a210 100644
--- a/daemon/daemon_unix.go
+++ b/daemon/daemon_unix.go
@@ -615,8 +615,9 @@ func (daemon *Daemon) newBaseContainer(id string) *Container {
 			root:         daemon.containerRoot(id),
 			MountPoints:  make(map[string]*volume.MountPoint),
 		},
-		Volumes:   make(map[string]string),
-		VolumesRW: make(map[string]bool),
+		Volumes:        make(map[string]string),
+		VolumesRW:      make(map[string]bool),
+		CriuimagePaths: make(map[string]string),
 	}
 }
 
diff --git a/daemon/restore.go b/daemon/restore.go
index ae66b5478ef75..16393883a7531 100644
--- a/daemon/restore.go
+++ b/daemon/restore.go
@@ -108,6 +108,10 @@ func (daemon *Daemon) containerRestore(container *Container, opts *runconfig.Cri
 		}
 	}
 
+	if err := daemon.createRootfs(container); err != nil {
+		return err
+	}
+
 	mounts, err := daemon.setupMounts(container)
 	if err != nil {
 		return err