From 32dfc85f87ec500c56478fa6adea9e667453275a Mon Sep 17 00:00:00 2001
From: nioasoft <your-email@example.com>
Date: Thu, 29 Jan 2026 12:27:45 +0200
Subject: [PATCH 1/4] feat(mcp): optimize token consumption in MCP responses

- Add to_minimal_dict() and to_cycle_check_dict() to Feature model

- Use minimal serialization for cycle detection (~95% token reduction)

- Add minimal parameter to feature_get_ready/blocked (default True)

- Optimize feature_get_graph to query only needed columns

- Add spec_get_summary MCP tool (~800 tokens vs 12,500 full)

- Implement progressive history summarization in assistant chat

- Update coding prompt to recommend new token-efficient tools

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/templates/coding_prompt.template.md |  18 ++-
 api/database.py                             |  26 ++++
 mcp_server/feature_mcp.py                   | 126 ++++++++++++++++++--
 server/services/assistant_chat_session.py   |  27 +++--
 4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md
index 93224044..8dcd7c18 100644
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -31,8 +31,7 @@ Then use MCP tools to check feature status:
 Use the feature_get_stats tool
 ```
 
-Understanding the `app_spec.txt` is critical - it contains the full requirements
-for the application you're building.
+**NOTE:** Do NOT read `app_spec.txt` directly (12,500+ tokens). If you need project context, use `spec_get_summary` tool (~800 tokens) which returns project name, tech stack, ports, and overview.
 
 ### STEP 2: START SERVERS (IF NOT RUNNING)
 
@@ -363,6 +362,9 @@ feature_skip with feature_id={id}
 
 # 7. Clear in-progress status (when abandoning a feature)
 feature_clear_in_progress with feature_id={id}
+
+# 8. Get condensed project spec (~800 tokens vs 12,500 full)
+spec_get_summary
 ```
 
 ### RULES:
@@ -396,6 +398,18 @@ This allows you to fully test email-dependent flows without needing external ema
 
 ---
 
+## TOKEN EFFICIENCY
+
+To maximize context window usage:
+
+- **Don't read files unnecessarily** - Feature details from `feature_get_by_id` contain everything you need
+- **Be concise** - Short, focused responses save tokens for actual work
+- **Use `feature_get_summary`** for status checks (lighter than `feature_get_by_id`)
+- **Use `spec_get_summary`** for project context (~800 tokens vs 12,500 for full app_spec.txt)
+- **Avoid re-reading large files** - Read once, remember the content
+
+---
+
 **Remember:** One feature per session. Zero console errors. All data from real database. Leave codebase clean before ending session.
 
 ---
diff --git a/api/database.py b/api/database.py
index 90dc49af..f43ae3bc 100644
--- a/api/database.py
+++ b/api/database.py
@@ -82,6 +82,32 @@ def get_dependencies_safe(self) -> list[int]:
             return [d for d in self.dependencies if isinstance(d, int)]
         return []
 
+    def to_minimal_dict(self) -> dict:
+        """Return minimal feature info for token-efficient responses.
+
+        Use this instead of to_dict() when you only need status/dependency info,
+        not the full description and steps. Reduces response size by ~80%.
+        """
+        return {
+            "id": self.id,
+            "name": self.name,
+            "priority": self.priority,
+            "passes": self.passes if self.passes is not None else False,
+            "in_progress": self.in_progress if self.in_progress is not None else False,
+            "dependencies": self.dependencies if self.dependencies else [],
+        }
+
+    def to_cycle_check_dict(self) -> dict:
+        """Return only fields needed for cycle detection.
+
+        Use this for circular dependency validation - drastically reduces
+        token usage compared to to_dict() (~95% reduction).
+        """
+        return {
+            "id": self.id,
+            "dependencies": self.dependencies if self.dependencies else [],
+        }
+
 
 class Schedule(Base):
     """Time-based schedule for automated agent start/stop."""
diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py
index a394f1e9..7579746b 100755
--- a/mcp_server/feature_mcp.py
+++ b/mcp_server/feature_mcp.py
@@ -686,7 +686,8 @@ def feature_add_dependency(
         # Security: Circular dependency check
         # would_create_circular_dependency(features, source_id, target_id)
         # source_id = feature gaining the dependency, target_id = feature being depended upon
-        all_features = [f.to_dict() for f in session.query(Feature).all()]
+        # Use to_cycle_check_dict() for minimal token usage (~95% reduction)
+        all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()]
         if would_create_circular_dependency(all_features, feature_id, dependency_id):
             return json.dumps({"error": "Cannot add: would create circular dependency"})
 
@@ -749,7 +750,8 @@ def feature_remove_dependency(
 
 @mcp.tool()
 def feature_get_ready(
-    limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10
+    limit: Annotated[int, Field(default=10, ge=1, le=50, description="Max features to return")] = 10,
+    minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True
 ) -> str:
     """Get all features ready to start (dependencies satisfied, not in progress).
 
@@ -758,6 +760,7 @@ def feature_get_ready(
 
     Args:
         limit: Maximum number of features to return (1-50, default 10)
+        minimal: If True (default), return only essential fields. Set False for full details.
 
     Returns:
         JSON with: features (list), count (int), total_ready (int)
@@ -774,7 +777,8 @@ def feature_get_ready(
                 continue
             deps = f.dependencies or []
             if all(dep_id in passing_ids for dep_id in deps):
-                ready.append(f.to_dict())
+                # Use minimal or full serialization based on parameter
+                ready.append(f.to_minimal_dict() if minimal else f.to_dict())
 
         # Sort by scheduling score (higher = first), then priority, then id
         scores = compute_scheduling_scores(all_dicts)
@@ -791,7 +795,8 @@ def feature_get_ready(
 
 @mcp.tool()
 def feature_get_blocked(
-    limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20
+    limit: Annotated[int, Field(default=20, ge=1, le=100, description="Max features to return")] = 20,
+    minimal: Annotated[bool, Field(default=True, description="Return minimal fields (id, name, priority, status, deps) to reduce tokens")] = True
 ) -> str:
     """Get features that are blocked by unmet dependencies.
 
@@ -800,6 +805,7 @@ def feature_get_blocked(
 
     Args:
         limit: Maximum number of features to return (1-100, default 20)
+        minimal: If True (default), return only essential fields. Set False for full details.
 
     Returns:
         JSON with: features (list with blocked_by field), count (int), total_blocked (int)
@@ -816,8 +822,10 @@ def feature_get_blocked(
             deps = f.dependencies or []
             blocking = [d for d in deps if d not in passing_ids]
             if blocking:
+                # Use minimal or full serialization based on parameter
+                base_dict = f.to_minimal_dict() if minimal else f.to_dict()
                 blocked.append({
-                    **f.to_dict(),
+                    **base_dict,
                     "blocked_by": blocking
                 })
 
@@ -842,7 +850,17 @@ def feature_get_graph() -> str:
     """
     session = get_session()
     try:
-        all_features = session.query(Feature).all()
+        # Optimized: Query only columns needed for graph visualization
+        # Avoids loading description, steps, timestamps, last_error
+        all_features = session.query(
+            Feature.id,
+            Feature.name,
+            Feature.category,
+            Feature.priority,
+            Feature.passes,
+            Feature.in_progress,
+            Feature.dependencies
+        ).all()
         passing_ids = {f.id for f in all_features if f.passes}
 
         nodes = []
@@ -922,7 +940,8 @@ def feature_set_dependencies(
             return json.dumps({"error": f"Dependencies not found: {missing}"})
 
         # Check for circular dependencies
-        all_features = [f.to_dict() for f in session.query(Feature).all()]
+        # Use to_cycle_check_dict() for minimal token usage (~95% reduction)
+        all_features = [f.to_cycle_check_dict() for f in session.query(Feature).all()]
         # Temporarily update the feature's dependencies for cycle check
         test_features = []
         for f in all_features:
@@ -952,5 +971,98 @@ def feature_set_dependencies(
         session.close()
 
 
+@mcp.tool()
+def spec_get_summary() -> str:
+    """Get condensed project specification summary (~800 tokens vs ~12,500 full).
+
+    Returns only essential project info:
+    - project_name: Name of the project
+    - overview: First 200 chars of project overview
+    - technology_stack: Tech stack summary
+    - ports: Development server ports
+    - feature_count: Target number of features
+
+    Use this instead of reading the full app_spec.txt to save tokens.
+    For full details, read prompts/app_spec.txt directly.
+
+    Returns:
+        JSON with condensed project spec, or error if not found.
+    """
+    import re
+
+    spec_path = PROJECT_DIR / "prompts" / "app_spec.txt"
+    if not spec_path.exists():
+        return json.dumps({"error": "No app_spec.txt found in prompts directory"})
+
+    try:
+        content = spec_path.read_text(encoding="utf-8")
+    except Exception as e:
+        return json.dumps({"error": f"Failed to read app_spec.txt: {str(e)}"})
+
+    result: dict = {}
+
+    # Extract project_name (look for <project_name> tag or "Project:" header)
+    project_name_match = re.search(r"<project_name>\s*(.+?)\s*</project_name>", content, re.IGNORECASE)
+    if project_name_match:
+        result["project_name"] = project_name_match.group(1).strip()
+    else:
+        # Try alternative formats
+        alt_match = re.search(r"(?:Project|Name):\s*(.+?)(?:\n|$)", content, re.IGNORECASE)
+        result["project_name"] = alt_match.group(1).strip() if alt_match else "Unknown"
+
+    # Extract overview (first 200 chars)
+    overview_match = re.search(r"<overview>\s*(.+?)\s*</overview>", content, re.DOTALL | re.IGNORECASE)
+    if overview_match:
+        overview = overview_match.group(1).strip()
+        result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "")
+    else:
+        # Try alternative formats
+        alt_match = re.search(r"(?:Overview|Description):\s*(.+?)(?:\n\n|$)", content, re.DOTALL | re.IGNORECASE)
+        if alt_match:
+            overview = alt_match.group(1).strip()
+            result["overview"] = overview[:200] + ("..." if len(overview) > 200 else "")
+        else:
+            result["overview"] = None
+
+    # Extract technology_stack
+    tech_match = re.search(r"<technology_stack>\s*(.+?)\s*</technology_stack>", content, re.DOTALL | re.IGNORECASE)
+    if tech_match:
+        # Parse tech stack lines into a list
+        tech_text = tech_match.group(1).strip()
+        tech_items = [line.strip().lstrip("- ") for line in tech_text.split("\n") if line.strip() and not line.strip().startswith("#")]
+        result["technology_stack"] = tech_items[:10]  # Cap at 10 items
+    else:
+        result["technology_stack"] = None
+
+    # Extract ports
+    ports_match = re.search(r"<ports>\s*(.+?)\s*</ports>", content, re.DOTALL | re.IGNORECASE)
+    if ports_match:
+        ports_text = ports_match.group(1).strip()
+        ports = {}
+        for line in ports_text.split("\n"):
+            if ":" in line:
+                key, val = line.split(":", 1)
+                key = key.strip().lstrip("- ")
+                val = val.strip()
+                # Try to extract port number
+                port_num = re.search(r"\d+", val)
+                if port_num:
+                    ports[key] = int(port_num.group())
+        result["ports"] = ports if ports else None
+    else:
+        result["ports"] = None
+
+    # Extract feature_count
+    feature_count_match = re.search(r"<feature_count>\s*(\d+)\s*</feature_count>", content, re.IGNORECASE)
+    if feature_count_match:
+        result["feature_count"] = int(feature_count_match.group(1))
+    else:
+        # Try alternative formats
+        alt_match = re.search(r"feature[_\s]*count[:\s]*(\d+)", content, re.IGNORECASE)
+        result["feature_count"] = int(alt_match.group(1)) if alt_match else None
+
+    return json.dumps(result)
+
+
 if __name__ == "__main__":
     mcp.run()
diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py
index f15eee8a..209d5df7 100755
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -347,22 +347,33 @@ async def send_message(self, user_message: str) -> AsyncGenerator[dict, None]:
             history = get_messages(self.project_dir, self.conversation_id)
             # Exclude the message we just added (last one)
             history = history[:-1] if history else []
-            # Cap history to last 35 messages to prevent context overload
-            history = history[-35:] if len(history) > 35 else history
+            # Cap history to last 20 messages to prevent context overload
+            history = history[-20:] if len(history) > 20 else history
             if history:
-                # Format history as context for Claude
+                # Progressive summarization for token efficiency:
+                # - Recent messages (last 5): up to 1500 chars each
+                # - Older messages (6-20): 100-char summaries
+                # This reduces token usage by ~50% compared to uniform truncation
                 history_lines = ["[Previous conversation history for context:]"]
-                for msg in history:
+                num_messages = len(history)
+                for i, msg in enumerate(history):
                     role = "User" if msg["role"] == "user" else "Assistant"
                     content = msg["content"]
-                    # Truncate very long messages
-                    if len(content) > 500:
-                        content = content[:500] + "..."
+                    # Calculate position from end (0 = most recent)
+                    position_from_end = num_messages - 1 - i
+                    if position_from_end < 5:
+                        # Recent messages (last 5): allow up to 1500 chars
+                        if len(content) > 1500:
+                            content = content[:1500] + "..."
+                    else:
+                        # Older messages (6-20): 100-char summaries only
+                        if len(content) > 100:
+                            content = content[:100] + "..."
                     history_lines.append(f"{role}: {content}")
                 history_lines.append("[End of history. Continue the conversation:]")
                 history_lines.append(f"User: {user_message}")
                 message_to_send = "\n".join(history_lines)
-                logger.info(f"Loaded {len(history)} messages from conversation history")
+                logger.info(f"Loaded {len(history)} messages from conversation history (progressive summarization)")
 
         try:
             async for chunk in self._query_claude(message_to_send):

From 904dbeb003d75845634172fb793eebc9eedb748e Mon Sep 17 00:00:00 2001
From: nioasoft <your-email@example.com>
Date: Thu, 29 Jan 2026 13:07:57 +0200
Subject: [PATCH 2/4] fix: clarify token efficiency tool recommendations

- Use feature_get_stats for progress checks
- Use feature_get_summary for single feature status

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/templates/coding_prompt.template.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md
index 8dcd7c18..339e9ea3 100644
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -404,7 +404,7 @@ To maximize context window usage:
 
 - **Don't read files unnecessarily** - Feature details from `feature_get_by_id` contain everything you need
 - **Be concise** - Short, focused responses save tokens for actual work
-- **Use `feature_get_summary`** for status checks (lighter than `feature_get_by_id`)
+- **Use `feature_get_stats`** for progress checks, `feature_get_summary` for single feature status
 - **Use `spec_get_summary`** for project context (~800 tokens vs 12,500 for full app_spec.txt)
 - **Avoid re-reading large files** - Read once, remember the content
 

From 3f1b906d81e1388f5c49ecd353bfb3016db86a25 Mon Sep 17 00:00:00 2001
From: nioasoft <your-email@example.com>
Date: Fri, 30 Jan 2026 08:17:31 +0200
Subject: [PATCH 3/4] fix: remove dead feature_release_testing references

The testing architecture was simplified - testing agents now work

concurrently without claim/release coordination.

- Remove feature_release_testing from allowed tools in client.py

- Update testing_prompt.template.md to remove claim/release workflow

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/templates/testing_prompt.template.md | 28 ++++----------------
 client.py                                    |  1 -
 2 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md
index a7e2bbe0..b73ed176 100644
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -48,9 +48,7 @@ Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id`
 Use the feature_get_by_id tool with feature_id={your_assigned_id}
 ```
 
-The orchestrator has already claimed this feature for testing (set `testing_in_progress=true`).
-
-**CRITICAL:** You MUST call `feature_release_testing` when done, regardless of pass/fail.
+The orchestrator has assigned this feature for you to test.
 
 ### STEP 4: VERIFY THE FEATURE
 
@@ -85,12 +83,9 @@ Use browser automation tools:
 
 #### If the feature PASSES:
 
-The feature still works correctly. Release the claim and end your session:
-
-```
-# Release the testing claim (tested_ok=true)
-Use the feature_release_testing tool with feature_id={id} and tested_ok=true
+The feature still works correctly. Log the result and end your session:
 
+```bash
 # Log the successful verification
 echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt
 ```
@@ -125,13 +120,7 @@ A regression has been introduced. You MUST fix it:
    Use the feature_mark_passing tool with feature_id={id}
    ```
 
-6. **Release the testing claim:**
-   ```
-   Use the feature_release_testing tool with feature_id={id} and tested_ok=false
-   ```
-   Note: tested_ok=false because we found a regression (even though we fixed it).
-
-7. **Commit the fix:**
+6. **Commit the fix:**
    ```bash
    git add .
    git commit -m "Fix regression in [feature name]
@@ -156,7 +145,6 @@ echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progr
 ### Feature Management
 - `feature_get_stats` - Get progress overview (passing/in_progress/total counts)
 - `feature_get_by_id` - Get your assigned feature details
-- `feature_release_testing` - **REQUIRED** - Release claim after testing (pass tested_ok=true/false)
 - `feature_mark_failing` - Mark a feature as failing (when you find a regression)
 - `feature_mark_passing` - Mark a feature as passing (after fixing a regression)
 
@@ -188,18 +176,12 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed.
 - Visual appearance correct
 - API calls succeed
 
-**CRITICAL - Always release your claim:**
-- Call `feature_release_testing` when done, whether pass or fail
-- Pass `tested_ok=true` if the feature passed
-- Pass `tested_ok=false` if you found a regression
-
 **If you find a regression:**
 1. Mark the feature as failing immediately
 2. Fix the issue
 3. Verify the fix with browser automation
 4. Mark as passing only after thorough verification
-5. Release the testing claim with `tested_ok=false`
-6. Commit the fix
+5. Commit the fix
 
 **You have one iteration.** Focus on testing ONE feature thoroughly.
 
diff --git a/client.py b/client.py
index 423845d7..26bb5022 100644
--- a/client.py
+++ b/client.py
@@ -189,7 +189,6 @@ def get_extra_read_paths() -> list[Path]:
     "mcp__features__feature_create_bulk",
     "mcp__features__feature_create",
     "mcp__features__feature_clear_in_progress",
-    "mcp__features__feature_release_testing",  # Release testing claim
     # Dependency management
     "mcp__features__feature_add_dependency",
     "mcp__features__feature_remove_dependency",

From 899ea4bcdd0e3ae2782e50eeb753bbdfaaad2e30 Mon Sep 17 00:00:00 2001
From: nioasoft <your-email@example.com>
Date: Fri, 30 Jan 2026 08:36:43 +0200
Subject: [PATCH 4/4] docs: clarify testing agent session scope and termination

- Add explicit note that session auto-terminates after logging

- Clarify that 'one iteration' means one feature per session, not one fix attempt

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/templates/testing_prompt.template.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md
index b73ed176..7cbf5761 100644
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -83,7 +83,7 @@ Use browser automation tools:
 
 #### If the feature PASSES:
 
-The feature still works correctly. Log the result and end your session:
+The feature still works correctly. Log the result:
 
 ```bash
 # Log the successful verification
@@ -92,6 +92,8 @@ echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt
 
 **DO NOT** call feature_mark_passing again - it's already passing.
 
+**Session will auto-terminate** after you complete the logging step. No explicit exit action needed.
+
 #### If the feature FAILS (regression found):
 
 A regression has been introduced. You MUST fix it:
@@ -183,7 +185,7 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed.
 4. Mark as passing only after thorough verification
 5. Commit the fix
 
-**You have one iteration.** Focus on testing ONE feature thoroughly.
+**Your session is scoped to ONE feature.** Complete all verification and any necessary fixes for that feature. You may iterate on fixes until it passes.
 
 ---