diff --git a/skyrl-train/examples/terminal_bench/generator/terminal_bench_generator.py b/skyrl-train/examples/terminal_bench/generator/terminal_bench_generator.py index bb339c905..48d92c098 100644 --- a/skyrl-train/examples/terminal_bench/generator/terminal_bench_generator.py +++ b/skyrl-train/examples/terminal_bench/generator/terminal_bench_generator.py @@ -12,6 +12,15 @@ from harbor.trial.trial import Trial from harbor.models.trial.config import TrialConfig +# Suppress LiteLLM verbose logging + +import litellm +import logging + +litellm.suppress_debug_info = True # Suppress the "Provider List" output +litellm.set_verbose = False +logging.getLogger("LiteLLM").setLevel(logging.WARNING) + # We have N retries for each trial, if one of the rollout (out of n_samples_per_prompt) fails # after N attemptes, we skip this prompt altogether. MAX_NUM_RETRIES_PER_TRIAL = 2 diff --git a/skyrl-train/examples/terminal_bench/terminal_bench_config/default.yaml b/skyrl-train/examples/terminal_bench/terminal_bench_config/default.yaml index 1de933acb..c37fd2671 100644 --- a/skyrl-train/examples/terminal_bench/terminal_bench_config/default.yaml +++ b/skyrl-train/examples/terminal_bench/terminal_bench_config/default.yaml @@ -33,7 +33,10 @@ agent: # Agent-specific settings passed to the agent constructor kwargs: # Maximum number of agent episodes/iterations - max_episodes: 32 + max_turns: 32 + + # Suppress Harbor override warnings for max_turns + suppress_max_turns_warning: true # Whether to enable context summarization when approaching token limits enable_summarize: false @@ -59,6 +62,7 @@ environment: override_cpus: 1 override_memory_mb: 1024 override_storage_mb: 1024 + suppress_override_warnings: true # Environment-specific settings # kwargs: {}