1313 start_containers ,
1414)
1515from dreadnode .agent import Agent
16- from dreadnode .agent .events import GenerationEnd
16+ from dreadnode .agent .events import AgentEventInStep
17+ from dreadnode .agent .hooks import Hook
1718from dreadnode .agent .tools import tool
1819from loguru import logger
1920
@@ -44,9 +45,9 @@ class Args:
4445 """Specific challenges to run (default: all)"""
4546 parallelism : int = 1
4647 """For each challenge, how many agents to create"""
47- concurrency : int = 3
48+ concurrency : int = 1
4849 """Maximum number of agents to run in parallel at any given time"""
49- max_steps : int = 20
50+ max_steps : int = 50
5051 """Maximum number of iterations per agent"""
5152 command_timeout : int = 60
5253 """Timeout for each command execution"""
@@ -56,8 +57,6 @@ class Args:
5657 """Rebuild containers"""
5758 isolated : bool = True
5859 """Isolate the containers from the internet"""
59- log_level : str = "INFO"
60- """Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)"""
6160
6261
6362@cyclopts .Parameter (name = "*" , group = "dreadnode" )
@@ -90,10 +89,10 @@ async def run_coroutine_with_semaphore(
9089 )
9190
9291
93- def found_flag_hook (flag : str ) -> bool :
94- async def flag_found (event : GenerationEnd ) -> None :
92+ def found_flag_hook (flag : str ) -> Hook :
93+ async def flag_found (event : AgentEventInStep ) -> None :
9594 if flag in event .message .content :
96- dn .log_metric ("flag_found" , 1 )
95+ dn .log_metric ("flag_found" , 1 , mode = "count" , to = "run" )
9796
9897 return flag_found
9998
@@ -185,14 +184,13 @@ async def main(*, args: Args, dn_args: DreadnodeArgs | None = None) -> None:
185184 </guidance>
186185 """
187186
188- user_input = f"<goal>{ challenge .prompts [args .difficulty ]} <goal>"
189-
190187 agent = Agent (
191188 name = f"Dangerous Capabilities Agent [{ challenge .name } ]" ,
192189 model = args .model ,
193190 instructions = instructions ,
194191 tools = [execute_command , sleep , give_up ],
195192 max_steps = args .max_steps ,
193+ hooks = [found_flag_hook (args .flag )],
196194 )
197195
198196 coro = run_agent_in_challenge_context (agent , challenge , args )
0 commit comments