From 32dfc85f87ec500c56478fa6adea9e667453275a Mon Sep 17 00:00:00 2001 From: nioasoft Date: Thu, 29 Jan 2026 12:27:45 +0200 Subject: [PATCH 1/4] feat(mcp): optimize token consumption in MCP responses - Add to_minimal_dict() and to_cycle_check_dict() to Feature model - Use minimal serialization for cycle detection (~95% token reduction) - Add minimal parameter to feature_get_ready/blocked (default True) - Optimize feature_get_graph to query only needed columns - Add spec_get_summary MCP tool (~800 tokens vs 12,500 full) - Implement progressive history summarization in assistant chat - Update coding prompt to recommend new token-efficient tools Co-Authored-By: Claude Opus 4.5 --- .claude/templates/coding_prompt.template.md | 18 ++- api/database.py | 26 ++++ mcp_server/feature_mcp.py | 126 ++++++++++++++++++-- server/services/assistant_chat_session.py | 27 +++-- 4 files changed, 180 insertions(+), 17 deletions(-) diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md index 93224044..8dcd7c18 100644 --- a/.claude/templates/coding_prompt.template.md +++ b/.claude/templates/coding_prompt.template.md @@ -31,8 +31,7 @@ Then use MCP tools to check feature status: Use the feature_get_stats tool ``` -Understanding the `app_spec.txt` is critical - it contains the full requirements -for the application you're building. +**NOTE:** Do NOT read `app_spec.txt` directly (12,500+ tokens). If you need project context, use `spec_get_summary` tool (~800 tokens) which returns project name, tech stack, ports, and overview. ### STEP 2: START SERVERS (IF NOT RUNNING) @@ -363,6 +362,9 @@ feature_skip with feature_id={id} # 7. Clear in-progress status (when abandoning a feature) feature_clear_in_progress with feature_id={id} + +# 8. Get condensed project spec (~800 tokens vs 12,500 full) +spec_get_summary ``` ### RULES: @@ -396,6 +398,18 @@ This allows you to fully test email-dependent flows without needing external ema --- +## TOKEN EFFICIENCY + +To maximize context window usage: + +- **Don't read files unnecessarily** - Feature details from `feature_get_by_id` contain everything you need +- **Be concise** - Short, focused responses save tokens for actual work +- **Use `feature_get_summary`** for status checks (lighter than `feature_get_by_id`) +- **Use `spec_get_summary`** for project context (~800 tokens vs 12,500 for full app_spec.txt) +- **Avoid re-reading large files** - Read once, remember the content + +--- + **Remember:** One feature per session. Zero console errors. All data from real database. Leave codebase clean before ending session. --- diff --git a/api/database.py b/api/database.py index 90dc49af..f43ae3bc 100644 --- a/api/database.py +++ b/api/database.py @@ -82,6 +82,32 @@ def get_dependencies_safe(self) -> list[int]: return [d for d in self.dependencies if isinstance(d, int)] return [] + def to_minimal_dict(self) -> dict: + """Return minimal feature info for token-efficient responses. + + Use this instead of to_dict() when you only need status/dependency info, + not the full description and steps. Reduces response size by ~80%. + """ + return { + "id": self.id, + "name": self.name, + "priority": self.priority, + "passes": self.passes if self.passes is not None else False, + "in_progress": self.in_progress if self.in_progress is not None else False, + "dependencies": self.dependencies if self.dependencies else [], + } + + def to_cycle_check_dict(self) -> dict: + """Return only fields needed for cycle detection. + + Use this for circular dependency validation - drastically reduces + token usage compared to to_dict() (~95% reduction). + """ + return { + "id": self.id, + "dependencies": self.dependencies if self.dependencies else [], + } + class Schedule(Base): """Time-based schedule for automated agent start/stop.""" diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py index a394f1e9..7579746b 100755 --- a/mcp_server/feature_mcp.py +++ b/mcp_server/feature_mcp.py @@ -686,7 +686,8 @@ def feature_add_dependency( # Security: Circular dependency check # would_create_circular_dependency(features, source_id, target_id) # source_id = feature gaining the dependency, target_id = feature being depended upon - all_features = [f.to_dict() for f in session.query(Feature).all()] + # Use to_cycle_check_dict() for minimal token usage (~95% reduction) + all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()] if would_create_circular_dependency(all_features, feature_id, dependency_id): return json.dumps({"error": "Cannot add: would create circular dependency"}) @@ -749,7 +750,8 @@ def feature_remove_dependency( @mcp.tool() def feature_get_ready( - limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10 + limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10, + minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True ) -> str: """Get all features ready to start (dependencies satisfied, not in progress). @@ -758,6 +760,7 @@ def feature_get_ready( Args: limit: Maximum number of features to return (1-50, default 10) + minimal: If True (default), return only essential fields. Set False for full details. Returns: JSON with: features (list), count (int), total_ready (int) @@ -774,7 +777,8 @@ def feature_get_ready( continue deps = f.dependencies or [] if all(dep_id in passing_ids for dep_id in deps): - ready.append(f.to_dict()) + # Use minimal or full serialization based on parameter + ready.append(f.to_minimal_dict() if minimal else f.to_dict()) # Sort by scheduling score (higher = first), then priority, then id scores = compute_scheduling_scores(all_dicts) @@ -791,7 +795,8 @@ def feature_get_ready( @mcp.tool() def feature_get_blocked( - limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20 + limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20, + minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True ) -> str: """Get features that are blocked by unmet dependencies. @@ -800,6 +805,7 @@ def feature_get_blocked( Args: limit: Maximum number of features to return (1-100, default 20) + minimal: If True (default), return only essential fields. Set False for full details. Returns: JSON with: features (list with blocked_by field), count (int), total_blocked (int) @@ -816,8 +822,10 @@ def feature_get_blocked( deps = f.dependencies or [] blocking = [d for d in deps if d not in passing_ids] if blocking: + # Use minimal or full serialization based on parameter + base_dict = f.to_minimal_dict() if minimal else f.to_dict() blocked.append({ - **f.to_dict(), + **base_dict, "blocked_by": blocking }) @@ -842,7 +850,17 @@ def feature_get_graph() -> str: """ session = get_session() try: - all_features = session.query(Feature).all() + # Optimized: Query only columns needed for graph visualization + # Avoids loading description, steps, timestamps, last_error + all_features = session.query( + Feature.id, + Feature.name, + Feature.category, + Feature.priority, + Feature.passes, + Feature.in_progress, + Feature.dependencies + ).all() passing_ids = {f.id for f in all_features if f.passes} nodes = [] @@ -922,7 +940,8 @@ def feature_set_dependencies( return json.dumps({"error": f"Dependencies not found: {missing}"}) # Check for circular dependencies - all_features = [f.to_dict() for f in session.query(Feature).all()] + # Use to_cycle_check_dict() for minimal token usage (~95% reduction) + all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()] # Temporarily update the feature's dependencies for cycle check test_features = [] for f in all_features: @@ -952,5 +971,98 @@ def feature_set_dependencies( session.close() +@mcp.tool() +def spec_get_summary() -> str: + """Get condensed project specification summary (~800 tokens vs ~12,500 full). + + Returns only essential project info: + - project_name: Name of the project + - overview: First 200 chars of project overview + - technology_stack: Tech stack summary + - ports: Development server ports + - feature_count: Target number of features + + Use this instead of reading the full app_spec.txt to save tokens. + For full details, read prompts/app_spec.txt directly. + + Returns: + JSON with condensed project spec, or error if not found. + """ + import re + + spec_path = PROJECT_DIR / "prompts" / "app_spec.txt" + if not spec_path.exists(): + return json.dumps({"error": "No app_spec.txt found in prompts directory"}) + + try: + content = spec_path.read_text(encoding="utf-8") + except Exception as e: + return json.dumps({"error": f"Failed to read app_spec.txt: {str(e)}"}) + + result: dict = {} + + # Extract project_name (look for tag or "Project:" header) + project_name_match = re.search(r"\s*(.+?)\s*", content, re.IGNORECASE) + if project_name_match: + result["project_name"] = project_name_match.group(1).strip() + else: + # Try alternative formats + alt_match = re.search(r"(?:Project|Name):\s*(.+?)(?:\n|$)", content, re.IGNORECASE) + result["project_name"] = alt_match.group(1).strip() if alt_match else "Unknown" + + # Extract overview (first 200 chars) + overview_match = re.search(r"\s*(.+?)\s*", content, re.DOTALL | re.IGNORECASE) + if overview_match: + overview = overview_match.group(1).strip() + result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "") + else: + # Try alternative formats + alt_match = re.search(r"(?:Overview|Description):\s*(.+?)(?:\n\n|$)", content, re.DOTALL | re.IGNORECASE) + if alt_match: + overview = alt_match.group(1).strip() + result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "") + else: + result["overview"] = None + + # Extract technology_stack + tech_match = re.search(r"\s*(.+?)\s*", content, re.DOTALL | re.IGNORECASE) + if tech_match: + # Parse tech stack lines into a list + tech_text = tech_match.group(1).strip() + tech_items = [line.strip().lstrip("- ") for line in tech_text.split("\n") if line.strip() and not line.strip().startswith("#")] + result["technology_stack"] = tech_items[:10] # Cap at 10 items + else: + result["technology_stack"] = None + + # Extract ports + ports_match = re.search(r"\s*(.+?)\s*", content, re.DOTALL | re.IGNORECASE) + if ports_match: + ports_text = ports_match.group(1).strip() + ports = {} + for line in ports_text.split("\n"): + if ":" in line: + key, val = line.split(":", 1) + key = key.strip().lstrip("- ") + val = val.strip() + # Try to extract port number + port_num = re.search(r"\d+", val) + if port_num: + ports[key] = int(port_num.group()) + result["ports"] = ports if ports else None + else: + result["ports"] = None + + # Extract feature_count + feature_count_match = re.search(r"\s*(\d+)\s*", content, re.IGNORECASE) + if feature_count_match: + result["feature_count"] = int(feature_count_match.group(1)) + else: + # Try alternative formats + alt_match = re.search(r"feature[_\s]*count[:\s]*(\d+)", content, re.IGNORECASE) + result["feature_count"] = int(alt_match.group(1)) if alt_match else None + + return json.dumps(result) + + if __name__ == "__main__": mcp.run() diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index f15eee8a..209d5df7 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -347,22 +347,33 @@ async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]: history = get_messages(self.project_dir, self.conversation_id) # Exclude the message we just added (last one) history = history[:-1] if history else [] - # Cap history to last 35 messages to prevent context overload - history = history[-35:] if len(history) > 35 else history + # Cap history to last 20 messages to prevent context overload + history = history[-20:] if len(history) > 20 else history if history: - # Format history as context for Claude + # Progressive summarization for token efficiency: + # - Recent messages (last 5): up to 1500 chars each + # - Older messages (6-20): 100-char summaries + # This reduces token usage by ~50% compared to uniform truncation history_lines = ["[Previous conversation history for context:]"] - for msg in history: + num_messages = len(history) + for i, msg in enumerate(history): role = "User" if msg["role"] == "user" else "Assistant" content = msg["content"] - # Truncate very long messages - if len(content) > 500: - content = content[:500] + "..." + # Calculate position from end (0 = most recent) + position_from_end = num_messages - 1 - i + if position_from_end < 5: + # Recent messages (last 5): allow up to 1500 chars + if len(content) > 1500: + content = content[:1500] + "..." + else: + # Older messages (6-20): 100-char summaries only + if len(content) > 100: + content = content[:100] + "..." history_lines.append(f"{role}: {content}") history_lines.append("[End of history. Continue the conversation:]") history_lines.append(f"User: {user_message}") message_to_send = "\n".join(history_lines) - logger.info(f"Loaded {len(history)} messages from conversation history") + logger.info(f"Loaded {len(history)} messages from conversation history (progressive summarization)") try: async for chunk in self._query_claude(message_to_send): From 904dbeb003d75845634172fb793eebc9eedb748e Mon Sep 17 00:00:00 2001 From: nioasoft Date: Thu, 29 Jan 2026 13:07:57 +0200 Subject: [PATCH 2/4] fix: clarify token efficiency tool recommendations - Use feature_get_stats for progress checks - Use feature_get_summary for single feature status Co-Authored-By: Claude Opus 4.5 --- .claude/templates/coding_prompt.template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md index 8dcd7c18..339e9ea3 100644 --- a/.claude/templates/coding_prompt.template.md +++ b/.claude/templates/coding_prompt.template.md @@ -404,7 +404,7 @@ To maximize context window usage: - **Don't read files unnecessarily** - Feature details from `feature_get_by_id` contain everything you need - **Be concise** - Short, focused responses save tokens for actual work -- **Use `feature_get_summary`** for status checks (lighter than `feature_get_by_id`) +- **Use `feature_get_stats`** for progress checks, `feature_get_summary` for single feature status - **Use `spec_get_summary`** for project context (~800 tokens vs 12,500 for full app_spec.txt) - **Avoid re-reading large files** - Read once, remember the content From 3f1b906d81e1388f5c49ecd353bfb3016db86a25 Mon Sep 17 00:00:00 2001 From: nioasoft Date: Fri, 30 Jan 2026 08:17:31 +0200 Subject: [PATCH 3/4] fix: remove dead feature_release_testing references The testing architecture was simplified - testing agents now work concurrently without claim/release coordination. - Remove feature_release_testing from allowed tools in client.py - Update testing_prompt.template.md to remove claim/release workflow Co-Authored-By: Claude Opus 4.5 --- .claude/templates/testing_prompt.template.md | 28 ++++---------------- client.py | 1 - 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md index a7e2bbe0..b73ed176 100644 --- a/.claude/templates/testing_prompt.template.md +++ b/.claude/templates/testing_prompt.template.md @@ -48,9 +48,7 @@ Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` Use the feature_get_by_id tool with feature_id={your_assigned_id} ``` -The orchestrator has already claimed this feature for testing (set `testing_in_progress=true`). - -**CRITICAL:** You MUST call `feature_release_testing` when done, regardless of pass/fail. +The orchestrator has assigned this feature for you to test. ### STEP 4: VERIFY THE FEATURE @@ -85,12 +83,9 @@ Use browser automation tools: #### If the feature PASSES: -The feature still works correctly. Release the claim and end your session: - -``` -# Release the testing claim (tested_ok=true) -Use the feature_release_testing tool with feature_id={id} and tested_ok=true +The feature still works correctly. Log the result and end your session: +```bash # Log the successful verification echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt ``` @@ -125,13 +120,7 @@ A regression has been introduced. You MUST fix it: Use the feature_mark_passing tool with feature_id={id} ``` -6. **Release the testing claim:** - ``` - Use the feature_release_testing tool with feature_id={id} and tested_ok=false - ``` - Note: tested_ok=false because we found a regression (even though we fixed it). - -7. **Commit the fix:** +6. **Commit the fix:** ```bash git add . git commit -m "Fix regression in [feature name] @@ -156,7 +145,6 @@ echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progr ### Feature Management - `feature_get_stats` - Get progress overview (passing/in_progress/total counts) - `feature_get_by_id` - Get your assigned feature details -- `feature_release_testing` - **REQUIRED** - Release claim after testing (pass tested_ok=true/false) - `feature_mark_failing` - Mark a feature as failing (when you find a regression) - `feature_mark_passing` - Mark a feature as passing (after fixing a regression) @@ -188,18 +176,12 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. - Visual appearance correct - API calls succeed -**CRITICAL - Always release your claim:** -- Call `feature_release_testing` when done, whether pass or fail -- Pass `tested_ok=true` if the feature passed -- Pass `tested_ok=false` if you found a regression - **If you find a regression:** 1. Mark the feature as failing immediately 2. Fix the issue 3. Verify the fix with browser automation 4. Mark as passing only after thorough verification -5. Release the testing claim with `tested_ok=false` -6. Commit the fix +5. Commit the fix **You have one iteration.** Focus on testing ONE feature thoroughly. diff --git a/client.py b/client.py index 423845d7..26bb5022 100644 --- a/client.py +++ b/client.py @@ -189,7 +189,6 @@ def get_extra_read_paths() -> list[Path]: "mcp__features__feature_create_bulk", "mcp__features__feature_create", "mcp__features__feature_clear_in_progress", - "mcp__features__feature_release_testing", # Release testing claim # Dependency management "mcp__features__feature_add_dependency", "mcp__features__feature_remove_dependency", From 899ea4bcdd0e3ae2782e50eeb753bbdfaaad2e30 Mon Sep 17 00:00:00 2001 From: nioasoft Date: Fri, 30 Jan 2026 08:36:43 +0200 Subject: [PATCH 4/4] docs: clarify testing agent session scope and termination - Add explicit note that session auto-terminates after logging - Clarify that 'one iteration' means one feature per session, not one fix attempt Co-Authored-By: Claude Opus 4.5 --- .claude/templates/testing_prompt.template.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md index b73ed176..7cbf5761 100644 --- a/.claude/templates/testing_prompt.template.md +++ b/.claude/templates/testing_prompt.template.md @@ -83,7 +83,7 @@ Use browser automation tools: #### If the feature PASSES: -The feature still works correctly. Log the result and end your session: +The feature still works correctly. Log the result: ```bash # Log the successful verification @@ -92,6 +92,8 @@ echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt **DO NOT** call feature_mark_passing again - it's already passing. +**Session will auto-terminate** after you complete the logging step. No explicit exit action needed. + #### If the feature FAILS (regression found): A regression has been introduced. You MUST fix it: @@ -183,7 +185,7 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. 4. Mark as passing only after thorough verification 5. Commit the fix -**You have one iteration.** Focus on testing ONE feature thoroughly. +**Your session is scoped to ONE feature.** Complete all verification and any necessary fixes for that feature. You may iterate on fixes until it passes. ---