Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions worker/src/components/core/mcp-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,18 @@ export async function startMcpDockerServer(
kind: 'docker' as const,
image: input.image,
command: [...(input.command ?? []), ...(input.args ?? [])],
env: { ...input.env, PORT: String(port), ENDPOINT: endpoint },
env: {
...input.env,
PORT: String(port),
ENDPOINT: endpoint,
// Add runId to env for container identification
STUDIO_RUN_ID: input.context.runId || 'unknown',
},
network: 'bridge' as const,
detached: true,
autoRemove: input.autoRemove,
// Explicitly disable autoRemove to ensure containers persist for manual cleanup
// This prevents race conditions where containers are removed before cleanup runs
autoRemove: false,
containerName,
// Bind to 0.0.0.0 so all interfaces can reach it (both localhost and Docker network)
ports: { [`0.0.0.0:${port}`]: port },
Expand Down
75 changes: 54 additions & 21 deletions worker/src/temporal/activities/mcp.activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
ServiceError,
} from '@shipsec/component-sdk';
import {
CleanupLocalMcpActivityInput,
CleanupRunResourcesActivityInput,
RegisterComponentToolActivityInput,
RegisterLocalMcpActivityInput,
RegisterRemoteMcpActivityInput,
Expand Down Expand Up @@ -102,7 +102,9 @@ export async function registerLocalMcpActivity(
// const SKIP_CLEANUP = true;
const SKIP_CONTAINER_CLEANUP = process.env.SKIP_CONTAINER_CLEANUP === 'true';

export async function cleanupLocalMcpActivity(input: CleanupLocalMcpActivityInput): Promise<void> {
export async function cleanupRunResourcesActivity(
input: CleanupRunResourcesActivityInput,
): Promise<void> {
// DEBUG: Skip cleanup to inspect Docker logs
if (SKIP_CONTAINER_CLEANUP) {
console.log(
Expand All @@ -114,34 +116,65 @@ export async function cleanupLocalMcpActivity(input: CleanupLocalMcpActivityInpu
return;
}

const { exec } = await import('node:child_process');
const { promisify } = await import('node:util');
const execAsync = promisify(exec);

// Get container IDs from tool registry (primary method)
const response = (await callInternalApi('cleanup', { runId: input.runId })) as {
containerIds?: string[];
};
const containerIds = Array.isArray(response?.containerIds) ? response.containerIds : [];
const registryContainerIds = Array.isArray(response?.containerIds) ? response.containerIds : [];

if (containerIds.length === 0) {
return;
// Fallback: Find containers by name pattern (catches orphaned containers)
// MCP containers follow the pattern: mcp-server-{image}-{timestamp}
let namePatternContainerIds: string[] = [];
try {
const { stdout } = await execAsync(
`docker ps -a --filter "name=mcp-server-" --format "{{.Names}}"`,
);
namePatternContainerIds = stdout
.split('\n')
.map((line) => line.trim())
.filter(Boolean);

console.log(
`[MCP Cleanup] Found ${namePatternContainerIds.length} containers matching name pattern`,
);
} catch (error) {
console.warn(`[MCP Cleanup] Failed to list containers by name pattern:`, error);
}

const { exec } = await import('node:child_process');
const { promisify } = await import('node:util');
const execAsync = promisify(exec);
// Combine both sources and deduplicate
const allContainerIds = Array.from(
new Set([...registryContainerIds, ...namePatternContainerIds]),
);

await Promise.all(
containerIds.map(async (containerId: string) => {
if (!containerId || typeof containerId !== 'string') return;
if (!/^[a-zA-Z0-9_.-]+$/.test(containerId)) {
console.warn(`[MCP Cleanup] Skipping container with unsafe id: ${containerId}`);
return;
}
try {
await execAsync(`docker rm -f ${containerId}`);
} catch (error) {
console.warn(`[MCP Cleanup] Failed to remove container ${containerId}:`, error);
}
}),
console.log(
`[MCP Cleanup] Cleaning up ${allContainerIds.length} containers for run ${input.runId} ` +
`(${registryContainerIds.length} from registry, ${namePatternContainerIds.length} from name pattern)`,
);

if (allContainerIds.length === 0) {
console.log(`[MCP Cleanup] No containers to clean up for run ${input.runId}`);
} else {
await Promise.all(
allContainerIds.map(async (containerId: string) => {
if (!containerId || typeof containerId !== 'string') return;
if (!/^[a-zA-Z0-9_.-]+$/.test(containerId)) {
console.warn(`[MCP Cleanup] Skipping container with unsafe id: ${containerId}`);
return;
}
try {
await execAsync(`docker rm -f ${containerId}`);
console.log(`[MCP Cleanup] Removed container: ${containerId}`);
} catch (error) {
console.warn(`[MCP Cleanup] Failed to remove container ${containerId}:`, error);
}
}),
);
}

if (!/^[a-zA-Z0-9_.-]+$/.test(input.runId)) {
console.warn(`[MCP Cleanup] Skipping volume cleanup with unsafe runId: ${input.runId}`);
return;
Expand Down
2 changes: 1 addition & 1 deletion worker/src/temporal/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ export interface PrepareAndRegisterToolActivityInput {
params: Record<string, unknown>;
}

export interface CleanupLocalMcpActivityInput {
export interface CleanupRunResourcesActivityInput {
runId: string;
}

Expand Down
6 changes: 3 additions & 3 deletions worker/src/temporal/workers/dev.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import {
registerComponentToolActivity,
registerLocalMcpActivity,
registerRemoteMcpActivity,
cleanupLocalMcpActivity,
cleanupRunResourcesActivity,
prepareAndRegisterToolActivity,
areAllToolsReadyActivity,
} from '../activities/mcp.activity';
Expand Down Expand Up @@ -243,7 +243,7 @@ async function main() {
registerComponentToolActivity,
registerLocalMcpActivity,
registerRemoteMcpActivity,
cleanupLocalMcpActivity,
cleanupRunResourcesActivity,
discoverMcpToolsActivity,
discoverMcpGroupToolsActivity,
cacheDiscoveryResultActivity,
Expand Down Expand Up @@ -283,7 +283,7 @@ async function main() {
registerComponentToolActivity,
registerLocalMcpActivity,
registerRemoteMcpActivity,
cleanupLocalMcpActivity,
cleanupRunResourcesActivity,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Register the old cleanup activity name for compatibility

Renaming cleanupLocalMcpActivity to cleanupRunResourcesActivity changes the Temporal activity type and removes the old handler from worker registration. For workflows that were already running before this deploy, any pending/retried cleanupLocalMcpActivity task will now be polled by a worker that no longer serves that type, leading to repeated activity failures/timeouts for those executions. Keep a temporary alias for the legacy activity name (or gate with workflow versioning) until pre-rename runs are drained.

Useful? React with 👍 / 👎.

prepareAndRegisterToolActivity,
areAllToolsReadyActivity,
discoverMcpToolsActivity,
Expand Down
10 changes: 5 additions & 5 deletions worker/src/temporal/workflows/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import type {
WorkflowAction,
PrepareRunPayloadActivityInput,
RegisterComponentToolActivityInput,
CleanupLocalMcpActivityInput,
CleanupRunResourcesActivityInput,
RegisterLocalMcpActivityInput,
PrepareAndRegisterToolActivityInput,
} from '../types';
Expand All @@ -40,7 +40,7 @@ const {
createHumanInputRequestActivity,
expireHumanInputRequestActivity,
registerLocalMcpActivity,
cleanupLocalMcpActivity,
cleanupRunResourcesActivity,
prepareAndRegisterToolActivity,
areAllToolsReadyActivity,
} = proxyActivities<{
Expand Down Expand Up @@ -70,7 +70,7 @@ const {
expireHumanInputRequestActivity(requestId: string): Promise<void>;
registerComponentToolActivity(input: RegisterComponentToolActivityInput): Promise<void>;
registerLocalMcpActivity(input: RegisterLocalMcpActivityInput): Promise<void>;
cleanupLocalMcpActivity(input: CleanupLocalMcpActivityInput): Promise<void>;
cleanupRunResourcesActivity(input: CleanupRunResourcesActivityInput): Promise<void>;
prepareAndRegisterToolActivity(input: PrepareAndRegisterToolActivityInput): Promise<void>;
areAllToolsReadyActivity(input: {
runId: string;
Expand Down Expand Up @@ -760,7 +760,7 @@ export async function shipsecWorkflowRun(
`[Workflow] Cleaning up MCP container ${startedContainerId} after registration failure`,
);
try {
await cleanupLocalMcpActivity({ runId: input.runId });
await cleanupRunResourcesActivity({ runId: input.runId });
} catch (cleanupError) {
console.error(`[Workflow] Failed to cleanup MCP container: ${cleanupError}`);
}
Expand Down Expand Up @@ -1059,7 +1059,7 @@ export async function shipsecWorkflowRun(
console.log(
`[Workflow] Cleaning up MCP containers for run ${input.runId} (success=${workflowCompletedSuccessfully})`,
);
await cleanupLocalMcpActivity({ runId: input.runId }).catch((err) => {
await cleanupRunResourcesActivity({ runId: input.runId }).catch((err) => {
console.error(`[Workflow] Failed to cleanup MCP containers for run ${input.runId}`, err);
});
await finalizeRunActivity({ runId: input.runId }).catch((err) => {
Expand Down