diff --git a/ts/packages/agents/browser/src/agent/commerce/translator.mts b/ts/packages/agents/browser/src/agent/commerce/translator.mts index dbf9004ad..bcbcfff64 100644 --- a/ts/packages/agents/browser/src/agent/commerce/translator.mts +++ b/ts/packages/agents/browser/src/agent/commerce/translator.mts @@ -162,9 +162,12 @@ export class ECommerceSiteAgent { undefined, fastModelName, ); - this.model = ai.createChatModel(apiSettings, undefined, undefined, [ - "commerce", - ]); + this.model = ai.createChatModel( + apiSettings, + { temperature: 1 }, + undefined, + ["commerce"], + ); const validator = createTypeScriptJsonValidator( this.schema, schemaName, diff --git a/ts/packages/agents/browser/src/agent/crossword/translator.mts b/ts/packages/agents/browser/src/agent/crossword/translator.mts index 541e36761..7d42c936a 100644 --- a/ts/packages/agents/browser/src/agent/crossword/translator.mts +++ b/ts/packages/agents/browser/src/agent/crossword/translator.mts @@ -204,9 +204,12 @@ export class CrosswordPageTranslator { undefined, fastModelName, ); - this.model = ai.createChatModel(apiSettings, undefined, undefined, [ - "crossword", - ]); + this.model = ai.createChatModel( + apiSettings, + { temperature: 1 }, + undefined, + ["crossword"], + ); const validator = createTypeScriptJsonValidator( this.schema, diff --git a/ts/packages/agents/browser/src/agent/discovery/translator.mts b/ts/packages/agents/browser/src/agent/discovery/translator.mts index f0a1cd895..752e5b9cb 100644 --- a/ts/packages/agents/browser/src/agent/discovery/translator.mts +++ b/ts/packages/agents/browser/src/agent/discovery/translator.mts @@ -175,9 +175,12 @@ export class SchemaDiscoveryAgent { undefined, fastModelName, ); - this.model = ai.createChatModel(apiSettings, undefined, undefined, [ - "schemaDiscovery", - ]); + this.model = ai.createChatModel( + apiSettings, + { temperature: 1 }, + undefined, + ["schemaDiscovery"], + ); const validator = createTypeScriptJsonValidator( this.userActionsPoolSchema, schemaName, diff --git a/ts/packages/dispatcher/dispatcher/src/reasoning/claude.ts b/ts/packages/dispatcher/dispatcher/src/reasoning/claude.ts index 73a64cda1..a8ef4196f 100644 --- a/ts/packages/dispatcher/dispatcher/src/reasoning/claude.ts +++ b/ts/packages/dispatcher/dispatcher/src/reasoning/claude.ts @@ -399,16 +399,83 @@ async function executeReasoningWithTracing( ); if (plan && planGenerator.validatePlan(plan)) { - await planLibrary.savePlan(plan); - debug( - `Generated and saved workflow plan: ${plan.planId} (${plan.intent})`, + // Check for duplicate plans before saving + const existingPlans = await planLibrary.findMatchingPlans( + originalRequest, + plan.intent, ); - // Notify user that a plan was created - context.actionIO.appendDisplay({ - type: "text", - content: `\n✓ Created reusable workflow plan: ${plan.description}`, - }); + let isDuplicate = false; + let duplicatePlanId: string | undefined; + + if (existingPlans.length > 0) { + // Use PlanMatcher to check if this plan is essentially a duplicate + const planMatcher = new PlanMatcher(planLibrary); + + for (const existingPlan of existingPlans) { + // Check if existing plan is user-approved + if (existingPlan.approval?.status === "approved") { + debug( + `Found user-approved plan: ${existingPlan.planId}, skipping new plan creation`, + ); + + // Update usage of approved plan instead + await planLibrary.updatePlanUsage( + existingPlan.planId, + true, + tracer.getTrace().metrics.duration, + ); + + isDuplicate = true; + duplicatePlanId = existingPlan.planId; + break; + } + + // Check if the descriptions are very similar + const similarity = + await planMatcher.computeSimilarity( + plan.description, + existingPlan.description, + ); + + if (similarity >= 0.8) { + isDuplicate = true; + duplicatePlanId = existingPlan.planId; + debug( + `Detected duplicate plan (similarity: ${similarity}): ${existingPlan.planId}`, + ); + + // Update the existing plan's usage count + await planLibrary.updatePlanUsage( + existingPlan.planId, + true, + tracer.getTrace().metrics.duration, + ); + break; + } + } + } + + if (isDuplicate) { + debug( + `Skipped creating duplicate plan, updated existing: ${duplicatePlanId}`, + ); + context.actionIO.appendDisplay({ + type: "text", + content: `\n✓ Updated existing workflow plan usage (prevented duplicate)`, + }); + } else { + await planLibrary.savePlan(plan); + debug( + `Generated and saved workflow plan: ${plan.planId} (${plan.intent})`, + ); + + // Notify user that a plan was created + context.actionIO.appendDisplay({ + type: "text", + content: `\n✓ Created reusable workflow plan: ${plan.description}`, + }); + } } } catch (error) { // Don't fail the request if plan generation fails @@ -494,6 +561,14 @@ async function executeReasoningWithPlanning( content: `\n✓ Workflow completed successfully`, }); + // Prompt for review if plan is pending + if (match.plan.approval?.status === "pending_review") { + context.actionIO.appendDisplay({ + type: "text", + content: `\n💡 This workflow is ready for review.`, + }); + } + return executionResult.finalOutput ? createActionResultNoDisplay(executionResult.finalOutput) : undefined; @@ -518,6 +593,10 @@ async function executeReasoningWithPlanning( } } else { debug("No matching plan found, using reasoning"); + displayStatus( + "No matching workflow found, using reasoning...", + context, + ); } } catch (error) { debug("Plan matching/execution failed:", error); diff --git a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planGenerator.ts b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planGenerator.ts index 95dd9d0ea..5406b15c7 100644 --- a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planGenerator.ts +++ b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planGenerator.ts @@ -55,6 +55,10 @@ export class PlanGenerator { lastUsed: new Date().toISOString(), avgDuration: trace.metrics.duration, }, + approval: { + status: "auto", + reviewHistory: [], + }, }; debug(`Generated plan: ${plan.planId} (${plan.intent})`); diff --git a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planLibrary.ts b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planLibrary.ts index 6533ffcff..03620736b 100644 --- a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planLibrary.ts +++ b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planLibrary.ts @@ -62,41 +62,61 @@ export class PlanLibrary { } /** - * Find matching plans by intent or keywords + * Find matching plans by intent or keywords (with scores) */ - async findMatchingPlans( + async findMatchingPlansWithScores( request: string, intent?: string, - ): Promise { + ): Promise> { try { // Load index const index = await this.loadIndex(); if (index.plans.length === 0) { + debug("No plans in index"); return []; } + debug(`Total plans in index: ${index.plans.length}`); + // Filter by intent if provided let candidatePlans = intent ? index.plans.filter((p) => p.intent === intent) : index.plans; if (candidatePlans.length === 0) { + debug(`No plans found with intent: ${intent}`); return []; } + debug( + `Candidate plans after intent filter: ${candidatePlans.length}`, + ); + // Rank by keyword match and usage stats const ranked = this.rankPlans(candidatePlans, request); - // Load full plan data for top matches (up to 3) - const matches: WorkflowPlan[] = []; + debug(`Top 3 ranked plans:`); + for (let i = 0; i < Math.min(3, ranked.length); i++) { + const entry = ranked[i] as any; + debug( + ` ${i + 1}. ${entry.planId} (${entry.intent}) - score: ${entry.score?.toFixed(3)}`, + ); + } + + // Load full plan data for top matches (up to 3) and include scores + const matches: Array<{ plan: WorkflowPlan; score: number }> = []; for (const entry of ranked.slice(0, 3)) { const plan = await this.loadPlan(entry.planId); if (plan) { - matches.push(plan); + matches.push({ + plan, + score: (entry as any).score || 0, + }); } } + debug(`Returning ${matches.length} candidate plans for validation`); return matches; } catch (error) { debug(`Failed to find matching plans:`, error); @@ -104,6 +124,17 @@ export class PlanLibrary { } } + /** + * Find matching plans by intent or keywords + */ + async findMatchingPlans( + request: string, + intent?: string, + ): Promise { + const results = await this.findMatchingPlansWithScores(request, intent); + return results.map((r) => r.plan); + } + /** * Update plan usage stats */ @@ -116,6 +147,14 @@ export class PlanLibrary { const plan = await this.loadPlan(planId); if (!plan) return; + // Check if plan is user-approved (immutable structure) + if (plan.approval?.status === "approved") { + debug( + `Plan ${planId} is user-approved, only updating usage stats`, + ); + } + + // Initialize usage if not exists if (!plan.usage) { plan.usage = { successCount: 0, @@ -125,6 +164,15 @@ export class PlanLibrary { }; } + // Initialize approval if not exists + if (!plan.approval) { + plan.approval = { + status: "auto", + reviewHistory: [], + }; + } + + // Update usage stats if (success) { plan.usage.successCount++; } else { @@ -139,6 +187,16 @@ export class PlanLibrary { (plan.usage.avgDuration * (totalExecutions - 1) + duration) / totalExecutions; + // Mark for review after 3+ successful executions (if still auto) + if ( + plan.approval.status === "auto" && + plan.usage.successCount >= 3 && + success + ) { + plan.approval.status = "pending_review"; + debug(`Plan ${planId} marked for user review`); + } + await this.savePlan(plan); debug( @@ -274,6 +332,7 @@ export class PlanLibrary { : 0, lastUsed: plan.usage?.lastUsed || plan.createdAt, executionCount: totalExecutions, + approvalStatus: plan.approval?.status || "auto", }); // Save updated index @@ -322,6 +381,7 @@ export class PlanLibrary { : 0, lastUsed: plan.usage?.lastUsed || plan.createdAt, executionCount: totalExecutions, + approvalStatus: plan.approval?.status || "auto", }); // Save to instance storage @@ -368,6 +428,7 @@ export class PlanLibrary { return text .toLowerCase() + .replace(/[^\w\s]/g, " ") // Remove punctuation .split(/\s+/) .filter((w) => w.length > 3 && !commonWords.has(w)) .slice(0, 10); @@ -383,6 +444,7 @@ export class PlanLibrary { const requestWords = new Set( request .toLowerCase() + .replace(/[^\w\s]/g, " ") // Remove punctuation .split(/\s+/) .filter((w) => w.length > 3), ); @@ -406,11 +468,29 @@ export class PlanLibrary { (1000 * 60 * 60 * 24); const recencyScore = Math.exp(-daysSinceUse / 30); // 30-day half-life - // Combined score + // Approval boost + let approvalBoost = 0; + switch (plan.approvalStatus) { + case "approved": + approvalBoost = 0.3; // Significant boost for user-approved + break; + case "reviewed": + approvalBoost = 0.1; // Small boost for reviewed + break; + case "pending_review": + approvalBoost = 0.05; // Tiny boost for pending + break; + case "auto": + default: + approvalBoost = 0; + } + + // Combined score (keyword: 40%, success: 25%, recency: 15%, approval: 20%) const score = - keywordScore * 0.5 + - successWeight * 0.3 + - recencyScore * 0.2; + keywordScore * 0.4 + + successWeight * 0.25 + + recencyScore * 0.15 + + approvalBoost; return { ...plan, score }; }) diff --git a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planMatcher.ts b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planMatcher.ts index fdbe4f27c..268da52cb 100644 --- a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planMatcher.ts +++ b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/planMatcher.ts @@ -20,7 +20,10 @@ export interface PlanMatchResult { * Matches user requests to saved workflow plans */ export class PlanMatcher { - constructor(private planLibrary: PlanLibrary) {} + constructor( + private planLibrary: PlanLibrary, + private useLLMValidation: boolean = false, // Disable by default due to process spawning issues + ) {} /** * Find the best matching plan for a user request @@ -33,49 +36,62 @@ export class PlanMatcher { minConfidence: number = 0.7, ): Promise { debug(`Finding matching plan for: "${request}"`); + debug(`Minimum confidence threshold: ${minConfidence}`); - // Step 1: Get candidate plans from library (keyword-based) - const candidates = await this.planLibrary.findMatchingPlans(request); + // Step 1: Get candidate plans with scores from library + const candidatesWithScores = + await this.planLibrary.findMatchingPlansWithScores(request); - if (candidates.length === 0) { + if (candidatesWithScores.length === 0) { debug("No candidate plans found"); return null; } - debug(`Found ${candidates.length} candidate plans`); + debug( + `Found ${candidatesWithScores.length} candidate plans for validation`, + ); - // Step 2: If only one candidate, validate it - if (candidates.length === 1) { - const validated = await this.validateMatch(request, candidates[0]); + // Step 2: Use ranking scores as confidence (unless LLM validation enabled) + const matches: PlanMatchResult[] = []; - if (validated && validated.confidence >= minConfidence) { + for (const { plan, score } of candidatesWithScores) { + if (this.useLLMValidation) { + // Use LLM validation if enabled + const validated = await this.validateMatch(request, plan); + if (validated) { + matches.push(validated); + } + } else { + // Use ranking score directly as confidence + const confidence = score; debug( - `Single candidate validated: ${candidates[0].planId} (confidence: ${validated.confidence})`, + `Using ranking score as confidence for ${plan.planId}: ${confidence.toFixed(3)}`, ); - return validated; - } - debug( - `Single candidate rejected (confidence: ${validated?.confidence || 0})`, - ); - return null; + if (confidence >= minConfidence) { + matches.push({ + plan, + confidence, + reason: "Keyword-based match from ranking", + }); + } else { + debug( + `Plan ${plan.planId} below threshold: ${confidence.toFixed(3)} < ${minConfidence}`, + ); + } + } } - // Step 3: If multiple candidates, rank them - const rankedMatches = await this.rankCandidates(request, candidates); - - // Return best match if it meets confidence threshold - const bestMatch = rankedMatches[0]; - if (bestMatch && bestMatch.confidence >= minConfidence) { + // Return best match + if (matches.length > 0) { + const bestMatch = matches[0]; debug( `Best match: ${bestMatch.plan.planId} (confidence: ${bestMatch.confidence})`, ); return bestMatch; } - debug( - `No match meets confidence threshold (best: ${bestMatch?.confidence || 0})`, - ); + debug(`No match meets confidence threshold`); return null; } @@ -86,6 +102,23 @@ export class PlanMatcher { request: string, plan: WorkflowPlan, ): Promise { + // Use keyword-based confidence if LLM validation is disabled + if (!this.useLLMValidation) { + debug( + `Using keyword-based confidence for plan: ${plan.planId} (LLM validation disabled)`, + ); + const confidence = await this.computeKeywordConfidence( + request, + plan, + ); + return { + plan, + confidence, + reason: "Keyword-based match", + }; + } + + // LLM-based validation try { const prompt = this.buildValidationPrompt(request, plan); @@ -142,33 +175,6 @@ export class PlanMatcher { } } - /** - * Rank multiple candidate plans - */ - private async rankCandidates( - request: string, - candidates: WorkflowPlan[], - ): Promise { - const results: PlanMatchResult[] = []; - - // Validate each candidate in parallel - const validations = await Promise.all( - candidates.map((plan) => this.validateMatch(request, plan)), - ); - - // Collect valid matches - for (const validation of validations) { - if (validation) { - results.push(validation); - } - } - - // Sort by confidence (descending) - results.sort((a, b) => b.confidence - a.confidence); - - return results; - } - /** * Build validation prompt */ @@ -211,4 +217,109 @@ Return a JSON object: Analyze now:`; } + + /** + * Compute similarity between two descriptions (0-1 score) + * Uses simple keyword overlap for fast duplicate detection + */ + async computeSimilarity( + description1: string, + description2: string, + ): Promise { + // Normalize and extract keywords + const extractKeywords = (text: string): Set => { + return new Set( + text + .toLowerCase() + .replace(/[^\w\s]/g, " ") // Remove punctuation + .split(/\s+/) + .filter((w) => w.length > 3), // Filter out short words + ); + }; + + const keywords1 = extractKeywords(description1); + const keywords2 = extractKeywords(description2); + + if (keywords1.size === 0 || keywords2.size === 0) { + return 0; + } + + // Compute Jaccard similarity + const intersection = new Set( + [...keywords1].filter((k) => keywords2.has(k)), + ); + const union = new Set([...keywords1, ...keywords2]); + + return intersection.size / union.size; + } + + /** + * Compute keyword-based confidence for a plan match + * Returns confidence score 0-1 based on keyword overlap + */ + private async computeKeywordConfidence( + request: string, + plan: WorkflowPlan, + ): Promise { + // Extract keywords from request and plan + const extractKeywords = (text: string): Set => { + return new Set( + text + .toLowerCase() + .replace(/[^\w\s]/g, " ") + .split(/\s+/) + .filter((w) => w.length > 3), + ); + }; + + const requestKeywords = extractKeywords(request); + const planKeywords = extractKeywords( + `${plan.description} ${plan.intent}`, + ); + + if (requestKeywords.size === 0 || planKeywords.size === 0) { + return 0; + } + + // Count matching keywords + const matchingKeywords = [...requestKeywords].filter((k) => + planKeywords.has(k), + ); + const matches = matchingKeywords.length; + + // Calculate confidence as percentage of request keywords that match + // Higher weight for matching more of the user's intent + const confidence = matches / requestKeywords.size; + + // Boost confidence if intent-related keywords match + const intentKeywords = new Set([ + "search", + "find", + "list", + "get", + "add", + "create", + "delete", + "update", + "buy", + "purchase", + "shopping", + "cart", + ]); + + const intentMatches = [...requestKeywords].filter( + (k) => intentKeywords.has(k) && planKeywords.has(k), + ).length; + + // Add bonus for intent keyword matches (up to 0.2) + const bonus = Math.min(intentMatches * 0.1, 0.2); + + const finalConfidence = Math.min(confidence + bonus, 1.0); + + debug( + `Keyword confidence for ${plan.planId}: ${matches}/${requestKeywords.size} keywords match = ${confidence.toFixed(3)} + intent bonus ${bonus.toFixed(3)} = ${finalConfidence.toFixed(3)}`, + ); + + return finalConfidence; + } } diff --git a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/types.ts b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/types.ts index b5a05f632..71d13acc8 100644 --- a/ts/packages/dispatcher/dispatcher/src/reasoning/planning/types.ts +++ b/ts/packages/dispatcher/dispatcher/src/reasoning/planning/types.ts @@ -30,6 +30,28 @@ export interface WorkflowPlan { lastUsed: string; avgDuration: number; }; + + // User approval tracking + approval?: PlanApproval; +} + +export interface PlanApproval { + status: "auto" | "pending_review" | "reviewed" | "approved"; + + // Review tracking + reviewedBy?: string; // User identifier + reviewedAt?: string; // ISO timestamp + approvedAt?: string; // ISO timestamp + + // User feedback + userComments?: string; + + // Review history + reviewHistory?: Array<{ + action: "reviewed" | "approved" | "rejected"; + timestamp: string; + comments?: string; + }>; } export interface PlanStep { @@ -88,6 +110,7 @@ export interface PlanIndexEntry { successRate: number; lastUsed: string; executionCount: number; + approvalStatus?: "auto" | "pending_review" | "reviewed" | "approved"; } export interface PlanGenerationOptions {