Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/app/services/agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8396,6 +8396,12 @@ async def _handle_set_trigger(
reason=reason,
focus_ref=focus_ref,
)
# Fix 4: Safety cap for on_message triggers —
# prevent infinite loops if agent creates broad watchers.
if ttype == "on_message":
trigger.max_fires = trigger.max_fires or 100
if not trigger.expires_at:
trigger.expires_at = datetime.now(timezone.utc) + timedelta(days=7)
Comment on lines +8401 to +8404

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Apply safety caps when re-enabling on_message triggers

Move these defaults into the existing-trigger path as well. When an agent cancels and recreates a previously unlimited on_message trigger (or converts any disabled trigger to this type), the function returns at line 8389 before reaching this block, preserving max_fires=None and expires_at=None; consequently the triggers most likely to predate this fix remain permanently uncapped.

Useful? React with 👍 / 👎.

db.add(trigger)
await db.commit()

Expand Down
37 changes: 36 additions & 1 deletion backend/app/services/trigger_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,13 @@

TICK_INTERVAL = 15 # seconds
DEDUP_WINDOW = 30 # seconds — same agent won't be invoked twice within this window
MAX_AGENT_CHAIN_DEPTH = 5 # A→B→A→B→A max depth before stopping
MIN_POLL_INTERVAL_MINUTES = 5 # minimum poll interval to prevent abuse

# Safety: per-agent on_message fire rate limiter
_ON_MSG_RATE_WINDOW = 3600 # 1 hour window
_ON_MSG_RATE_LIMIT = 30 # max on_message fires per agent per hour
_on_msg_fire_log: dict[uuid.UUID, list[datetime]] = {} # agent_id -> list of fire timestamps
Comment on lines +37 to +40

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Persist the on_message rate-limit window

Store this safety state in shared durable storage rather than process memory. Every backend worker starts its own trigger daemon, Helm explicitly supports multiple backend replicas, and this dictionary is empty after every restart or rolling deployment, so an existing unlimited looping trigger receives a fresh allowance on each process lifecycle and the advertised per-agent hourly limit is not reliably enforced across the deployment.

Useful? React with 👍 / 👎.


_last_invoke: dict[uuid.UUID, datetime] = {}

_A2A_WAKE_CHAIN: dict[str, int] = {}
Expand All @@ -47,6 +51,15 @@ def _cleanup_stale_invoke_cache():
stale = [k for k, v in _last_invoke.items() if (now - v).total_seconds() > DEDUP_WINDOW * 2]
for k in stale:
del _last_invoke[k]
# Clean up old on_message rate limiter entries
cutoff = now - timedelta(seconds=_ON_MSG_RATE_WINDOW)
stale_agents = []
for aid, timestamps in _on_msg_fire_log.items():
_on_msg_fire_log[aid] = [t for t in timestamps if t > cutoff]
if not _on_msg_fire_log[aid]:
stale_agents.append(aid)
for aid in stale_agents:
del _on_msg_fire_log[aid]


async def _should_skip_non_workday(trigger: AgentTrigger, local_now: datetime) -> bool:
Expand Down Expand Up @@ -118,6 +131,28 @@ async def _tick():
if not handled:
handled = await _handle_okr_collection_trigger(trigger, now)
if not handled:
# Fix 3: Rate limit on_message triggers per agent
if trigger.type == "on_message":
agent_fires = _on_msg_fire_log.get(trigger.agent_id, [])
cutoff = now - timedelta(seconds=_ON_MSG_RATE_WINDOW)
recent = [t for t in agent_fires if t > cutoff]
if len(recent) >= _ON_MSG_RATE_LIMIT:
logger.warning(
f"[A2A Safety] Agent {trigger.agent_id} hit "
f"on_message rate limit ({_ON_MSG_RATE_LIMIT}/hr). "
f"Auto-disabling trigger '{trigger.name}'."
)
async with async_session() as db:
result = await db.execute(
select(AgentTrigger).where(AgentTrigger.id == trigger.id)
)
t_obj = result.scalar_one_or_none()
if t_obj:
t_obj.is_enabled = False
await db.commit()
continue
recent.append(now)
_on_msg_fire_log[trigger.agent_id] = recent
await enqueue_due_trigger(trigger, now)
Comment on lines +154 to 156

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Count only newly enqueued on_message executions

Record a rate-limit hit only when enqueue_due_trigger actually creates an execution. The current increment occurs before enqueueing and the enqueue helper discards its (execution, created) result, so an idempotency collision—such as an agent sending the same message text twice, since the key hashes only sender and content—leaves last_fired_at unchanged while this code increments every 15-second tick; the trigger is then disabled after about 7.5 minutes despite no new execution firing.

Useful? React with 👍 / 👎.

except Exception as e:
logger.warning(f"Error evaluating trigger {trigger.name}: {e}")
Expand Down
6 changes: 6 additions & 0 deletions backend/app/services/trigger_runtime/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,12 @@ async def check_new_agent_messages(trigger: AgentTrigger) -> bool:
.where(
ChatMessage.participant_id == from_participant,
ChatMessage.created_at > since,
# Fix 1: Only match real conversational messages,
# not internal tool_call / system records.
ChatMessage.role.in_(["assistant", "user"]),
# Fix 2: Exclude trigger internal "reflection"
# sessions to avoid cross-trigger false matches.
ChatSession.source_channel != "trigger",
)
.order_by(ChatMessage.created_at.desc())
.limit(1)
Expand Down