diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go index 70ac861c..1de9921e 100644 --- a/lib/instances/network_test.go +++ b/lib/instances/network_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "os" + "os/exec" "strings" "testing" "time" @@ -59,6 +60,48 @@ func TestCreateInstanceWithNetwork(t *testing.T) { require.NoError(t, err) t.Log("Network initialized") + // Verify that ensureDockerForwardJump restores Docker's FORWARD chain + // when it gets wiped (e.g., by a hypervisor firewall rebuild). + // Note: no extra privilege guard needed — make test-linux runs the entire + // suite under sudo, so iptables commands have the required permissions. + t.Run("DockerForwardChainRestored", func(t *testing.T) { + // Check if DOCKER-FORWARD chain exists (Docker must be running on host) + checkChain := exec.Command("iptables", "-L", "DOCKER-FORWARD", "-n") + if checkChain.Run() != nil { + t.Skip("DOCKER-FORWARD chain not present (Docker not running), skipping") + } + + // Verify jump currently exists + checkJump := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD") + require.NoError(t, checkJump.Run(), "DOCKER-FORWARD jump should exist before test") + + // Safety net: restore the jump if the test fails or aborts after we delete it, + // so we don't leave the host's Docker networking broken. + t.Cleanup(func() { + check := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD") + if check.Run() != nil { + restore := exec.Command("iptables", "-A", "FORWARD", "-j", "DOCKER-FORWARD") + _ = restore.Run() + } + }) + + // Simulate the hypervisor flush: delete the jump + delJump := exec.Command("iptables", "-D", "FORWARD", "-j", "DOCKER-FORWARD") + require.NoError(t, delJump.Run(), "should be able to delete DOCKER-FORWARD jump") + + // Confirm it's gone + checkGone := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD") + require.Error(t, checkGone.Run(), "DOCKER-FORWARD jump should be gone after delete") + + // Re-initialize network — this should restore the jump + err := manager.networkManager.Initialize(ctx, nil) + require.NoError(t, err) + + // Verify jump is restored + checkRestored := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD") + require.NoError(t, checkRestored.Run(), "ensureDockerForwardJump should have restored the DOCKER-FORWARD jump") + }) + // Create instance with nginx:alpine and default network t.Log("Creating instance with default network...") inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ diff --git a/lib/network/bridge_linux.go b/lib/network/bridge_linux.go index c3cf3304..c315737e 100644 --- a/lib/network/bridge_linux.go +++ b/lib/network/bridge_linux.go @@ -251,6 +251,13 @@ func (m *manager) setupIPTablesRules(ctx context.Context, subnet, bridgeName str log.InfoContext(ctx, "iptables FORWARD ready", "outbound", fwdOutStatus, "inbound", fwdInStatus) + // Restore Docker's FORWARD chain jumps if they were lost. + // On systems where an external tool (e.g., hypervisor firewall management) periodically + // rebuilds the FORWARD chain, Docker's jump rules can be wiped out. Docker only inserts + // them at daemon start, so they stay missing until Docker is restarted. Since hypeman + // already re-ensures its own rules here, we also restore Docker's if needed. + m.ensureDockerForwardJump(ctx) + return nil } @@ -409,6 +416,76 @@ func (m *manager) deleteForwardRuleByComment(comment string) { } } +// ensureDockerForwardJump checks if Docker's DOCKER-FORWARD chain exists but is +// unreachable from the FORWARD chain, and restores the jump if missing. +// This is a no-op if Docker is not installed or the jump already exists. +// +// Note: this cannot mis-order DOCKER-FORWARD vs DOCKER-USER because it only acts +// when the jump is completely absent (chain was flushed). If DOCKER-USER's jump +// still exists, DOCKER-FORWARD's jump is almost certainly still there too — they +// get wiped together — and the early -C check returns before we insert anything. +func (m *manager) ensureDockerForwardJump(ctx context.Context) { + log := logger.FromContext(ctx) + + // Check if DOCKER-FORWARD chain exists (Docker is installed and configured) + checkChain := exec.Command("iptables", "-L", "DOCKER-FORWARD", "-n") + checkChain.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if checkChain.Run() != nil { + return // Chain doesn't exist — Docker not installed or not configured + } + + // Check if jump already exists in FORWARD + checkJump := exec.Command("iptables", "-C", "FORWARD", "-j", "DOCKER-FORWARD") + checkJump.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if checkJump.Run() == nil { + return // Jump already present + } + + // DOCKER-FORWARD chain exists but the jump from FORWARD is missing — restore it. + // Insert right after hypeman's last rule so the jump is evaluated before any + // explicit DROP/REJECT rules that an external firewall tool may have added. + insertPos := m.lastHypemanForwardRulePosition() + 1 + addJump := exec.Command("iptables", "-I", "FORWARD", fmt.Sprintf("%d", insertPos), "-j", "DOCKER-FORWARD") + addJump.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + if err := addJump.Run(); err != nil { + log.WarnContext(ctx, "failed to restore Docker FORWARD chain jump", "error", err) + return + } + + log.WarnContext(ctx, "restored missing jump to DOCKER-FORWARD in FORWARD chain", "position", insertPos) +} + +// lastHypemanForwardRulePosition returns the line number of the last hypeman-managed +// rule in the FORWARD chain, or 0 if none are found. +func (m *manager) lastHypemanForwardRulePosition() int { + cmd := exec.Command("iptables", "-L", "FORWARD", "--line-numbers", "-n", "-v") + cmd.SysProcAttr = &syscall.SysProcAttr{ + AmbientCaps: []uintptr{unix.CAP_NET_ADMIN}, + } + output, err := cmd.Output() + if err != nil { + return 0 + } + + lastPos := 0 + for _, line := range strings.Split(string(output), "\n") { + if !strings.Contains(line, "hypeman-") { + continue + } + var pos int + if _, err := fmt.Sscanf(line, "%d", &pos); err == nil && pos > lastPos { + lastPos = pos + } + } + return lastPos +} + // createTAPDevice creates TAP device and attaches to bridge. // downloadBps: rate limit for download (external→VM), applied as TBF on TAP egress // uploadBps/uploadCeilBps: rate limit for upload (VM→external), applied as HTB class on bridge