diff --git a/docs/README.md b/docs/README.md index 3700151..87386eb 100644 --- a/docs/README.md +++ b/docs/README.md @@ -60,8 +60,8 @@ graph TB end subgraph LLM["LLM Layer"] - Client["LiteLLM Client"] - Provider["Provider (Chutes/OpenRouter)"] + Client["LLM Client"] + Provider["Chutes API"] end subgraph Tools["Tool System"] @@ -91,7 +91,7 @@ graph TB - **Prompt Caching** - 90%+ cache hit rate for significant cost reduction - **Context Management** - Intelligent pruning and compaction for long tasks - **Self-Verification** - Automatic validation before task completion -- **Multi-Provider** - Supports Chutes AI, OpenRouter, and litellm-compatible providers +- **Chutes API** - Uses Chutes AI with OpenAI-compatible API format --- @@ -105,7 +105,7 @@ baseagent/ │ │ ├── loop.py # Main agent loop │ │ └── compaction.py # Context management │ ├── llm/ -│ │ └── client.py # LLM client (litellm) +│ │ └── client.py # LLM client (httpx) │ ├── config/ │ │ └── defaults.py # Configuration │ ├── tools/ # Tool implementations diff --git a/docs/architecture.md b/docs/architecture.md index 772b5ee..4b0e916 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -18,7 +18,7 @@ graph TB end subgraph LLM["LLM Layer"] - client["client.py
LiteLLM Client"] + client["client.py
httpx Client"] end subgraph Config["Configuration"] @@ -71,7 +71,7 @@ classDiagram +log(msg) } - class LiteLLMClient { + class LLMClient { +model: str +temperature: float +max_tokens: int @@ -105,8 +105,8 @@ classDiagram +inject_content: Optional } - AgentContext --> LiteLLMClient : uses - LiteLLMClient --> LLMResponse : returns + AgentContext --> LLMClient : uses + LLMClient --> LLMResponse : returns LLMResponse --> FunctionCall : contains AgentContext --> ToolRegistry : uses ToolRegistry --> ToolResult : returns @@ -171,13 +171,13 @@ sequenceDiagram participant Loop as loop.py participant Context as compaction.py participant Cache as Prompt Cache - participant LLM as LiteLLM Client + participant LLM as httpx Client participant Provider as API Provider participant Tools as Tool Registry User->>Entry: --instruction "Create hello.txt" Entry->>Entry: Initialize AgentContext - Entry->>Entry: Initialize LiteLLMClient + Entry->>Entry: Initialize LLMClient Entry->>Loop: run_agent_loop() Loop->>Loop: Build messages [system, user, state] @@ -261,8 +261,8 @@ Intelligent context management that: ### `src/llm/client.py` - LLM Client -LiteLLM-based client that: -- Supports multiple providers (Chutes, OpenRouter, etc.) +httpx-based client that: +- Uses Chutes API (OpenAI-compatible) - Tracks token usage and costs - Handles tool/function calling format - Enforces cost limits diff --git a/docs/chutes-integration.md b/docs/chutes-integration.md index 75b4955..d7867c9 100644 --- a/docs/chutes-integration.md +++ b/docs/chutes-integration.md @@ -4,7 +4,7 @@ ## Overview -[Chutes AI](https://chutes.ai) provides access to advanced language models through a simple API. BaseAgent supports Chutes as a first-class provider, offering access to the **Kimi K2.5-TEE** model with its powerful thinking capabilities. +[Chutes AI](https://chutes.ai) provides access to advanced language models through a simple OpenAI-compatible API. BaseAgent uses Chutes as its primary provider with a direct httpx-based client, supporting models like **DeepSeek Chat** and **Kimi K2.5-TEE**. --- @@ -12,31 +12,30 @@ | Feature | Value | |---------|-------| -| **API Base URL** | `https://llm.chutes.ai/v1` | -| **Default Model** | `moonshotai/Kimi-K2.5-TEE` | -| **Model Parameters** | 1T total, 32B activated | -| **Context Window** | 256K tokens | -| **Thinking Mode** | Enabled by default | +| **API Base URL** | `https://api.chutes.ai/v1` | +| **Default Model** | `deepseek/deepseek-chat` | +| **API Format** | OpenAI-compatible | +| **Context Window** | Model-dependent (up to 256K) | +| **Client** | Direct httpx (no external LLM library) | --- ## Quick Setup -### Step 1: Get Your API Token +### Step 1: Get Your API Key 1. Visit [chutes.ai](https://chutes.ai) 2. Create an account or sign in 3. Navigate to API settings -4. Generate an API token +4. Generate an API key ### Step 2: Configure Environment ```bash -# Required: API token -export CHUTES_API_TOKEN="your-token-from-chutes.ai" +# Required: API key +export CHUTES_API_KEY="your-key-from-chutes.ai" -# Optional: Explicitly set provider and model -export LLM_PROVIDER="chutes" +# Optional: Specify a different model export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" ``` @@ -53,16 +52,16 @@ python3 agent.py --instruction "Your task description" ```mermaid sequenceDiagram participant Agent as BaseAgent - participant Client as LiteLLM Client + participant Client as httpx Client participant Chutes as Chutes API - Agent->>Client: Initialize with CHUTES_API_TOKEN - Client->>Client: Configure litellm + Agent->>Client: Initialize with CHUTES_API_KEY + Client->>Client: Configure httpx with auth header loop Each Request Agent->>Client: chat(messages, tools) Client->>Chutes: POST /v1/chat/completions - Note over Client,Chutes: Authorization: Bearer $CHUTES_API_TOKEN + Note over Client,Chutes: Authorization: Bearer $CHUTES_API_KEY Chutes-->>Client: Response with tokens Client-->>Agent: LLMResponse end @@ -70,45 +69,33 @@ sequenceDiagram --- -## Model Details: Kimi K2.5-TEE +## Supported Models -The **moonshotai/Kimi-K2.5-TEE** model offers: +### DeepSeek Chat (Default) -### Architecture -- **Total Parameters**: 1 Trillion (1T) -- **Activated Parameters**: 32 Billion (32B) -- **Architecture**: Mixture of Experts (MoE) -- **Context Length**: 256,000 tokens +The default model `deepseek/deepseek-chat` is well-suited for general coding tasks: -### Thinking Mode +- Fast response times +- Cost-effective +- Good reasoning capabilities -Kimi K2.5-TEE supports a "thinking mode" where the model shows its reasoning process: +### Kimi K2.5-TEE (Alternative) -```mermaid -sequenceDiagram - participant User - participant Model as Kimi K2.5-TEE - participant Response +The **moonshotai/Kimi-K2.5-TEE** model offers enhanced capabilities: - User->>Model: Complex task instruction - - rect rgb(230, 240, 255) - Note over Model: Thinking Mode Active - Model->>Model: Analyze problem - Model->>Model: Consider approaches - Model->>Model: Evaluate options - end - - Model->>Response: Reasoning process... - Model->>Response: Final answer/action -``` +- **Total Parameters**: 1 Trillion (1T) +- **Activated Parameters**: 32 Billion (32B) +- **Architecture**: Mixture of Experts (MoE) +- **Context Length**: 256,000 tokens +- **Thinking Mode**: Shows reasoning process with `` tags ### Temperature Settings | Mode | Temperature | Top-p | Description | |------|-------------|-------|-------------| +| **Deterministic** | 0.0 | - | Consistent, reproducible outputs | | **Thinking** | 1.0 | 0.95 | More exploratory reasoning | -| **Instant** | 0.6 | 0.95 | Faster, more deterministic | +| **Instant** | 0.6 | 0.95 | Faster, balanced responses | --- @@ -119,9 +106,9 @@ sequenceDiagram ```python # src/config/defaults.py CONFIG = { - "model": os.environ.get("LLM_MODEL", "moonshotai/Kimi-K2.5-TEE"), + "model": os.environ.get("LLM_MODEL", "deepseek/deepseek-chat"), "provider": "chutes", - "temperature": 1.0, # For thinking mode + "temperature": 0.0, # Deterministic by default "max_tokens": 16384, } ``` @@ -130,10 +117,10 @@ CONFIG = { | Variable | Required | Default | Description | |----------|----------|---------|-------------| -| `CHUTES_API_TOKEN` | Yes | - | API token from chutes.ai | -| `LLM_PROVIDER` | No | `openrouter` | Set to `chutes` | -| `LLM_MODEL` | No | `moonshotai/Kimi-K2.5-TEE` | Model identifier | +| `CHUTES_API_KEY` | Yes | - | API key from chutes.ai | +| `LLM_MODEL` | No | `deepseek/deepseek-chat` | Model identifier | | `LLM_COST_LIMIT` | No | `10.0` | Max cost in USD | +| `CHUTES_BASE_URL` | No | `https://api.chutes.ai/v1` | API base URL | --- @@ -183,65 +170,47 @@ def parse_thinking(response_text: str) -> tuple[str, str]: Chutes API follows OpenAI-compatible format: ```bash -curl -X POST https://llm.chutes.ai/v1/chat/completions \ - -H "Authorization: Bearer $CHUTES_API_TOKEN" \ +curl -X POST https://api.chutes.ai/v1/chat/completions \ + -H "Authorization: Bearer $CHUTES_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "model": "moonshotai/Kimi-K2.5-TEE", + "model": "deepseek/deepseek-chat", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ], "max_tokens": 1024, - "temperature": 1.0, - "top_p": 0.95 + "temperature": 0.0 }' ``` --- -## Fallback to OpenRouter +## Error Handling and Retry -If Chutes is unavailable, BaseAgent can fall back to OpenRouter: +BaseAgent includes built-in retry logic for transient failures: ```mermaid flowchart TB - Start[API Request] --> Check{Chutes Available?} - - Check -->|Yes| Chutes[Send to Chutes API] - Chutes --> Success{Success?} + Start[API Request] --> Send[Send to Chutes API] + Send --> Success{Success?} Success -->|Yes| Done[Return Response] - Success -->|No| Retry{Retry Count < 3?} + Success -->|No| Check{Retryable Error?} - Retry -->|Yes| Chutes - Retry -->|No| Fallback[Use OpenRouter] + Check -->|Yes| Retry{Retry Count < 3?} + Check -->|No| Fail[Raise LLMError] - Check -->|No| Fallback - Fallback --> Done -``` - -### Configuration for Fallback - -```bash -# Primary: Chutes -export CHUTES_API_TOKEN="..." -export LLM_PROVIDER="chutes" - -# Fallback: OpenRouter -export OPENROUTER_API_KEY="..." + Retry -->|Yes| Wait[Exponential Backoff] + Retry -->|No| Fail + + Wait --> Send ``` -### Switching Providers +### Retryable Errors -```bash -# Switch to OpenRouter -export LLM_PROVIDER="openrouter" -export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514" - -# Switch back to Chutes -export LLM_PROVIDER="chutes" -export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" -``` +- `rate_limit` (HTTP 429) - Automatic retry with backoff +- `server_error` (HTTP 5xx) - Automatic retry +- `timeout` - Automatic retry --- @@ -284,11 +253,11 @@ if self._total_cost >= self.cost_limit: LLMError: authentication_error ``` -**Solution**: Verify your token is correct and exported: +**Solution**: Verify your API key is correct and exported: ```bash -echo $CHUTES_API_TOKEN # Should show your token -export CHUTES_API_TOKEN="correct-token" +echo $CHUTES_API_KEY # Should show your key +export CHUTES_API_KEY="correct-key" ``` ### Rate Limiting @@ -311,6 +280,10 @@ LLMError: Model 'xyz' not found **Solution**: Use the correct model identifier: ```bash +# Default model +export LLM_MODEL="deepseek/deepseek-chat" + +# Or alternative model export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" ``` @@ -322,52 +295,62 @@ LLMError: timeout **Solution**: BaseAgent retries automatically. If persistent: - Check your internet connection -- Verify Chutes API status -- Consider using OpenRouter as fallback +- Verify Chutes API status at [chutes.ai](https://chutes.ai) --- -## Integration with LiteLLM +## Implementation Details -BaseAgent uses [LiteLLM](https://docs.litellm.ai/) for provider abstraction: +BaseAgent uses a direct httpx-based client for Chutes API: ```python # src/llm/client.py -import litellm - -# For Chutes, configure base URL -litellm.api_base = "https://llm.chutes.ai/v1" +import httpx + +# Direct client with OpenAI-compatible format +client = httpx.Client( + base_url="https://api.chutes.ai/v1", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, +) # Make request -response = litellm.completion( - model="moonshotai/Kimi-K2.5-TEE", - messages=messages, - api_key=os.environ.get("CHUTES_API_TOKEN"), -) +response = client.post("/chat/completions", json={ + "model": "deepseek/deepseek-chat", + "messages": messages, + "max_tokens": 16384, +}) ``` +This approach provides: +- No external LLM library dependencies +- Direct control over request/response handling +- Simplified error handling and retry logic + --- ## Best Practices ### For Optimal Performance -1. **Enable thinking mode** for complex reasoning tasks -2. **Use appropriate temperature** (1.0 for exploration, 0.6 for precision) -3. **Leverage the 256K context** for large codebases +1. **Choose the right model** - Use `deepseek/deepseek-chat` for speed, `moonshotai/Kimi-K2.5-TEE` for complex reasoning +2. **Use appropriate temperature** (0.0 for deterministic, higher for creative tasks) +3. **Leverage context windows** - Models support large context for codebases 4. **Monitor costs** with `LLM_COST_LIMIT` ### For Reliability -1. **Set up fallback** to OpenRouter -2. **Handle rate limits** gracefully (automatic in BaseAgent) -3. **Log responses** for debugging complex tasks +1. **Handle rate limits** gracefully (automatic in BaseAgent with retry logic) +2. **Log responses** for debugging complex tasks +3. **Set appropriate timeouts** for long-running operations ### For Cost Efficiency -1. **Enable prompt caching** (reduces costs by 90%) -2. **Use context management** to avoid token waste -3. **Set reasonable cost limits** for testing +1. **Use context management** to avoid token waste +2. **Set reasonable cost limits** for testing +3. **Choose cost-effective models** for simple tasks --- diff --git a/docs/configuration.md b/docs/configuration.md index 492f074..c49be98 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -16,8 +16,8 @@ The main configuration is stored in the `CONFIG` dictionary: # src/config/defaults.py CONFIG = { # Model Settings - "model": "openrouter/anthropic/claude-sonnet-4-20250514", - "provider": "openrouter", + "model": "deepseek/deepseek-chat", + "provider": "chutes", "temperature": 0.0, "max_tokens": 16384, "reasoning_effort": "none", @@ -57,30 +57,24 @@ CONFIG = { | Variable | Default | Description | |----------|---------|-------------| -| `LLM_MODEL` | `openrouter/anthropic/claude-sonnet-4-20250514` | Model identifier | -| `LLM_PROVIDER` | `openrouter` | Provider name (`chutes`, `openrouter`, etc.) | +| `LLM_MODEL` | `deepseek/deepseek-chat` | Model identifier | | `LLM_COST_LIMIT` | `10.0` | Maximum cost in USD before aborting | +| `CHUTES_BASE_URL` | `https://api.chutes.ai/v1` | API base URL | ### API Keys | Variable | Provider | Description | |----------|----------|-------------| -| `CHUTES_API_TOKEN` | Chutes AI | Token from chutes.ai | -| `OPENROUTER_API_KEY` | OpenRouter | API key from openrouter.ai | -| `ANTHROPIC_API_KEY` | Anthropic | Direct Anthropic API key | -| `OPENAI_API_KEY` | OpenAI | OpenAI API key | +| `CHUTES_API_KEY` | Chutes AI | API key from chutes.ai | ### Example Setup ```bash -# For Chutes AI -export CHUTES_API_TOKEN="your-token" -export LLM_PROVIDER="chutes" -export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" +# For Chutes AI (default provider) +export CHUTES_API_KEY="your-key" -# For OpenRouter -export OPENROUTER_API_KEY="sk-or-v1-..." -export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514" +# Optional: Specify a different model +export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" ``` --- @@ -102,8 +96,8 @@ graph LR | Setting | Type | Default | Description | |---------|------|---------|-------------| -| `model` | `str` | `openrouter/anthropic/claude-sonnet-4-20250514` | Full model identifier with provider prefix | -| `provider` | `str` | `openrouter` | LLM provider name | +| `model` | `str` | `deepseek/deepseek-chat` | Model identifier | +| `provider` | `str` | `chutes` | LLM provider name | | `temperature` | `float` | `0.0` | Response randomness (0 = deterministic) | | `max_tokens` | `int` | `16384` | Maximum tokens in LLM response | | `reasoning_effort` | `str` | `none` | Reasoning depth: `none`, `minimal`, `low`, `medium`, `high`, `xhigh` | @@ -174,42 +168,25 @@ graph TB --- -## Provider-Specific Configuration - -### Chutes AI - -```python -# Environment -CHUTES_API_TOKEN="your-token" -LLM_PROVIDER="chutes" -LLM_MODEL="moonshotai/Kimi-K2.5-TEE" - -# Model features -# - 1T parameters, 32B activated -# - 256K context window -# - Thinking mode enabled by default -# - Temperature: 1.0 (thinking), 0.6 (instant) -``` +## Provider Configuration -### OpenRouter +### Chutes AI (Default Provider) -```python +```bash # Environment -OPENROUTER_API_KEY="sk-or-v1-..." -LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514" +CHUTES_API_KEY="your-key" +LLM_MODEL="deepseek/deepseek-chat" # Default model -# Requires openrouter/ prefix for litellm +# Alternative models +LLM_MODEL="moonshotai/Kimi-K2.5-TEE" # For complex reasoning tasks ``` -### Direct Anthropic - -```python -# Environment -ANTHROPIC_API_KEY="sk-ant-..." -LLM_MODEL="claude-3-5-sonnet-20241022" +### Available Models -# No prefix needed for direct API -``` +| Model | Description | Context | Best For | +|-------|-------------|---------|----------| +| `deepseek/deepseek-chat` | Fast, cost-effective | Large | General tasks | +| `moonshotai/Kimi-K2.5-TEE` | 1T params, thinking mode | 256K | Complex reasoning | --- diff --git a/docs/installation.md b/docs/installation.md index 24d6700..a90b61b 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -66,9 +66,10 @@ pip install -e . BaseAgent requires these Python packages: ``` -litellm>=1.0.0 # LLM API abstraction -httpx>=0.24.0 # HTTP client -pydantic>=2.0.0 # Data validation +httpx>=0.27.0 # HTTP client for Chutes API +pydantic>=2.0 # Data validation +rich>=13.0 # Terminal output formatting +typer>=0.12.0 # CLI framework ``` These are automatically installed via pip. @@ -77,43 +78,19 @@ These are automatically installed via pip. ## Environment Setup -### 1. Choose Your LLM Provider +### 1. Configure Chutes API -BaseAgent supports multiple LLM providers. Choose one: - -#### Option A: Chutes AI (Recommended) +BaseAgent uses Chutes AI as its LLM provider: ```bash -# Set your Chutes API token -export CHUTES_API_TOKEN="your-token-from-chutes.ai" +# Set your Chutes API key +export CHUTES_API_KEY="your-key-from-chutes.ai" -# Configure provider -export LLM_PROVIDER="chutes" +# Optional: Specify a different model (default: deepseek/deepseek-chat) export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" ``` -Get your token at [chutes.ai](https://chutes.ai) - -#### Option B: OpenRouter - -```bash -# Set your OpenRouter API key -export OPENROUTER_API_KEY="sk-or-v1-..." - -# Model is auto-configured for OpenRouter -``` - -Get your key at [openrouter.ai](https://openrouter.ai) - -#### Option C: Direct Provider APIs - -```bash -# For Anthropic -export ANTHROPIC_API_KEY="sk-ant-..." - -# For OpenAI -export OPENAI_API_KEY="sk-..." -``` +Get your API key at [chutes.ai](https://chutes.ai) ### 2. Create a Configuration File (Optional) @@ -121,9 +98,8 @@ Create `.env` in the project root: ```bash # .env file -CHUTES_API_TOKEN=your-token-here -LLM_PROVIDER=chutes -LLM_MODEL=moonshotai/Kimi-K2.5-TEE +CHUTES_API_KEY=your-key-here +LLM_MODEL=deepseek/deepseek-chat LLM_COST_LIMIT=10.0 ``` @@ -141,9 +117,9 @@ python3 --version ### Step 2: Verify Dependencies ```bash -python3 -c "import litellm; print('litellm:', litellm.__version__)" python3 -c "import httpx; print('httpx:', httpx.__version__)" python3 -c "import pydantic; print('pydantic:', pydantic.__version__)" +python3 -c "import rich; print('rich:', rich.__version__)" ``` ### Step 3: Verify BaseAgent Installation @@ -191,14 +167,14 @@ baseagent/ ## Troubleshooting -### Issue: `ModuleNotFoundError: No module named 'litellm'` +### Issue: `ModuleNotFoundError: No module named 'httpx'` **Solution**: Install dependencies ```bash pip install -r requirements.txt # or -pip install litellm httpx pydantic +pip install httpx pydantic rich typer ``` ### Issue: `ImportError: cannot import name 'run_agent_loop'` @@ -212,15 +188,14 @@ python3 agent.py --instruction "..." ### Issue: API Key Errors -**Solution**: Verify your environment variables are set +**Solution**: Verify your environment variable is set ```bash -# Check if variables are set -echo $CHUTES_API_TOKEN -echo $OPENROUTER_API_KEY +# Check if variable is set +echo $CHUTES_API_KEY # Re-export if needed -export CHUTES_API_TOKEN="your-token" +export CHUTES_API_KEY="your-key" ``` ### Issue: `rg` (ripgrep) Not Found diff --git a/docs/overview.md b/docs/overview.md index c05a533..e67bd3e 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -80,8 +80,8 @@ graph TB end subgraph LLM["LLM Layer"] - Client["LiteLLM Client
(src/llm/client.py)"] - API["Provider API
(Chutes/OpenRouter)"] + Client["LLM Client
(src/llm/client.py)"] + API["Chutes API"] end subgraph Tools["Tool System"] @@ -152,7 +152,7 @@ sequenceDiagram participant User participant CLI as agent.py participant Loop as Agent Loop - participant LLM as LLM (Chutes/OpenRouter) + participant LLM as Chutes API participant Tools as Tool Registry User->>CLI: python agent.py --instruction "..." diff --git a/docs/quickstart.md b/docs/quickstart.md index f8a9326..f10ec41 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -6,21 +6,18 @@ Before starting, ensure you have: - Python 3.9+ installed -- An LLM API key (Chutes, OpenRouter, or Anthropic) +- A Chutes API key (get one at [chutes.ai](https://chutes.ai)) - BaseAgent installed (see [Installation](./installation.md)) --- ## Step 1: Set Up Your API Key -Choose your provider and set the environment variable: +Set the Chutes API key environment variable: ```bash -# For Chutes AI (recommended) -export CHUTES_API_TOKEN="your-token-from-chutes.ai" - -# OR for OpenRouter -export OPENROUTER_API_KEY="sk-or-v1-..." +# For Chutes AI +export CHUTES_API_KEY="your-key-from-chutes.ai" ``` --- @@ -124,7 +121,7 @@ sequenceDiagram python3 agent.py --instruction "Your task description" # With environment variables inline -CHUTES_API_TOKEN="..." python3 agent.py --instruction "..." +CHUTES_API_KEY="..." python3 agent.py --instruction "..." # Redirect output to file python3 agent.py --instruction "..." > output.jsonl 2>&1 diff --git a/docs/usage.md b/docs/usage.md index d234c54..2a48440 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -53,14 +53,11 @@ python3 agent.py --instruction "Find the bug causing the TypeError in the test o Configure the agent's behavior with environment variables: ```bash -# LLM Provider (Chutes) -export CHUTES_API_TOKEN="your-token" -export LLM_PROVIDER="chutes" -export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" +# LLM Provider (Chutes API) +export CHUTES_API_KEY="your-key" -# LLM Provider (OpenRouter) -export OPENROUTER_API_KEY="sk-or-v1-..." -export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514" +# Optional: Specify a different model (default: deepseek/deepseek-chat) +export LLM_MODEL="moonshotai/Kimi-K2.5-TEE" # Cost management export LLM_COST_LIMIT="10.0" @@ -123,9 +120,9 @@ Agent logs go to stderr: ``` [14:30:15] [superagent] ============================================================ -[14:30:15] [superagent] SuperAgent Starting (SDK 3.0 - litellm) +[14:30:15] [superagent] SuperAgent Starting (SDK 3.0 - httpx) [14:30:15] [superagent] ============================================================ -[14:30:15] [superagent] Model: openrouter/anthropic/claude-sonnet-4-20250514 +[14:30:15] [superagent] Model: deepseek/deepseek-chat [14:30:15] [superagent] Instruction: Create hello.txt with 'Hello World'... [14:30:15] [loop] Getting initial state... [14:30:16] [loop] Iteration 1/200 diff --git a/src/tools/base.py b/src/tools/base.py index 2bc8e64..96b2acb 100644 --- a/src/tools/base.py +++ b/src/tools/base.py @@ -79,18 +79,35 @@ def execute(self, **kwargs: Any) -> ToolResult: pass def resolve_path(self, path: str) -> Path: - """Resolve a path relative to the working directory. + """Resolve a path relative to the working directory with containment validation. Args: path: Path string (absolute or relative) Returns: - Resolved absolute Path + Resolved absolute Path contained within the working directory + + Raises: + ValueError: If the resolved path escapes the working directory """ p = Path(path) + + # Resolve the path (handles .. and symlinks) if p.is_absolute(): - return p - return (self.cwd / p).resolve() + resolved = p.resolve() + else: + resolved = (self.cwd / p).resolve() + + # Validate path containment - resolved path must be within or equal to cwd + try: + resolved.relative_to(self.cwd.resolve()) + except ValueError: + raise ValueError( + f"Path '{path}' resolves to '{resolved}' which is outside " + f"the working directory '{self.cwd}'. Path traversal is not allowed." + ) + + return resolved @classmethod def get_spec(cls) -> dict[str, Any]: diff --git a/src/tools/shell.py b/src/tools/shell.py index 8240826..acc9995 100644 --- a/src/tools/shell.py +++ b/src/tools/shell.py @@ -47,6 +47,53 @@ def _get_shell(self) -> tuple[str, list[str]]: shell = os.environ.get("SHELL", "/bin/bash") return shell, ["-lc"] + def _build_safe_env(self) -> dict: + """Build a safe environment for command execution. + + Returns a minimal environment that excludes sensitive variables + while preserving necessary ones for command execution. + """ + # Allowlist of safe environment variables to pass through + safe_vars = { + # Essential system variables + "PATH", + "HOME", + "USER", + "SHELL", + "LANG", + "LC_ALL", + "LC_CTYPE", + "TERM", + "PWD", + "TMPDIR", + "TMP", + "TEMP", + # Development tools + "EDITOR", + "VISUAL", + "PAGER", + # Python-related + "PYTHONPATH", + "PYTHONHOME", + "VIRTUAL_ENV", + # Git-related (non-sensitive) + "GIT_AUTHOR_NAME", + "GIT_AUTHOR_EMAIL", + "GIT_COMMITTER_NAME", + "GIT_COMMITTER_EMAIL", + } + + # Build environment with only safe variables + safe_env = {} + for var in safe_vars: + if var in os.environ: + safe_env[var] = os.environ[var] + + # Override TERM to disable color codes + safe_env["TERM"] = "dumb" + + return safe_env + def execute( self, command: str, @@ -79,14 +126,14 @@ def execute( shell, shell_args = self._get_shell() try: - # Run the command + # Run the command with a safe environment (no sensitive variables) result = subprocess.run( [shell, *shell_args, command], cwd=str(work_path), capture_output=True, text=True, timeout=timeout_s, - env={**os.environ, "TERM": "dumb"}, # Disable color codes + env=self._build_safe_env(), ) # Combine stdout and stderr diff --git a/src/utils/files.py b/src/utils/files.py index 141e918..fdfd697 100644 --- a/src/utils/files.py +++ b/src/utils/files.py @@ -3,27 +3,43 @@ from __future__ import annotations from pathlib import Path -from typing import Union +from typing import Optional, Union -def resolve_path(path: Union[str, Path], cwd: Optional[Path] = None) -> Path: - """Resolve a path relative to CWD. +def resolve_path(path: Union[str, Path], cwd: Optional[Path] = None, allow_escape: bool = False) -> Path: + """Resolve a path relative to CWD with optional containment validation. Args: path: Path to resolve cwd: Current working directory (defaults to os.getcwd()) + allow_escape: If False, raises ValueError when resolved path is outside cwd Returns: Resolved absolute path + + Raises: + ValueError: If allow_escape is False and path escapes the cwd """ if cwd is None: cwd = Path.cwd() p = Path(path) if p.is_absolute(): - return p.resolve() - - return (cwd / p).resolve() + resolved = p.resolve() + else: + resolved = (cwd / p).resolve() + + # Validate containment if not explicitly allowed to escape + if not allow_escape: + try: + resolved.relative_to(cwd.resolve()) + except ValueError: + raise ValueError( + f"Path '{path}' resolves to '{resolved}' which is outside " + f"the working directory '{cwd}'. Path traversal is not allowed." + ) + + return resolved def is_binary_file(path: Path) -> bool: