diff --git a/posthog/exception_capture.py b/posthog/exception_capture.py index 9d055351..60de0399 100644 --- a/posthog/exception_capture.py +++ b/posthog/exception_capture.py @@ -7,7 +7,9 @@ import logging import sys import threading +import random from typing import TYPE_CHECKING +from posthog.rate_limiter import ExceptionRateLimiter if TYPE_CHECKING: from posthog.client import Client @@ -24,6 +26,22 @@ def __init__(self, client: "Client"): sys.excepthook = self.exception_handler threading.excepthook = self.thread_exception_handler + # client side rate limiting to prevent spamming the server with exceptions + + # Pull configurations dynamically from user-facing Client setups + max_exceptions = getattr(client, "exception_capture_max_per_window", 100) + window_seconds = getattr(client, "exception_capture_window_seconds", 60.0) + post_limit_every = getattr(client, "exception_capture_post_limit_every", 10) + + self._sample_rate = getattr(client, "exception_capture_sample_rate", 1.0) + + # Initialize the rate limiter engine + self._rate_limiter = ExceptionRateLimiter( + max_exceptions=max_exceptions, + window_seconds=window_seconds, + post_limit_every=post_limit_every, + ) + def close(self): sys.excepthook = self.original_excepthook @@ -43,6 +61,12 @@ def exception_receiver(self, exc_info, extra_properties): self.capture_exception((exc_info[0], exc_info[1], exc_info[2]), metadata) def capture_exception(self, exception, metadata=None): + if not self._rate_limiter.should_capture(): + return + + if self._sample_rate < 1.0 and random.random() > self._sample_rate: + return + try: distinct_id = metadata.get("distinct_id") if metadata else None self.client.capture_exception(exception, distinct_id=distinct_id) diff --git a/posthog/rate_limiter.py b/posthog/rate_limiter.py new file mode 100644 index 00000000..d0945827 --- /dev/null +++ b/posthog/rate_limiter.py @@ -0,0 +1,73 @@ +import threading +import time +from typing import Callable + + +class ExceptionRateLimiter: + """Fixed-window rate limiter used for exception capture. + + Behavior: + - Counts events in a fixed time window (default 60s). + - Allows up to ``max_exceptions`` events per window. + - After the limit is reached, allows one event every ``post_limit_every`` + events to avoid completely starving signals in tight crash loops. + + The implementation is intentionally simple (O(1) memory) and thread-safe. + + Parameters + - max_exceptions: non-negative int, number of allowed events per window. + - window_seconds: positive float, window length in seconds. + - post_limit_every: positive int, after the limit, allow 1 in ``post_limit_every``. + - clock: callable returning a monotonic timestamp (in seconds). Useful for tests. + """ + + __slots__ = ( + "_max", + "_window", + "_count", + "_window_start", + "_lock", + "_post_every", + "_clock", + ) + + def __init__( + self, + max_exceptions: int = 100, + window_seconds: float = 60.0, + post_limit_every: int = 10, + clock: Callable[[], float] = time.monotonic, + ): + if max_exceptions < 0: + raise ValueError("max_exceptions must be >= 0") + if window_seconds <= 0: + raise ValueError("window_seconds must be > 0") + if post_limit_every <= 0: + raise ValueError("post_limit_every must be > 0") + + self._max = int(max_exceptions) + self._window = float(window_seconds) + self._post_every = int(post_limit_every) + self._count = 0 + self._clock = clock + self._window_start = self._clock() + self._lock = threading.Lock() + + def should_capture(self) -> bool: + """Return True if the current event should be captured. + + This method is thread-safe. + """ + with self._lock: + now = self._clock() + if now - self._window_start >= self._window: + self._count = 0 + self._window_start = now + + self._count += 1 + + if self._count <= self._max: + return True + + # post-limit: capture every Nth event to keep occasional signal + return self._count % self._post_every == 0 diff --git a/posthog/test/test_rate_limiter.py b/posthog/test/test_rate_limiter.py new file mode 100644 index 00000000..ac58234d --- /dev/null +++ b/posthog/test/test_rate_limiter.py @@ -0,0 +1,110 @@ +import pytest +from posthog.rate_limiter import ExceptionRateLimiter + + +class FakeClock: + """A clean, predictable clock mock for simulating time progression without sleeps.""" + + def __init__(self, start: float = 0.0): + self.now = float(start) + + def advance(self, seconds: float): + self.now += float(seconds) + + def __call__(self) -> float: + return self.now + + +def test_allows_within_limit_and_handles_heartbeat(): + """Verify that the first N events pass wide open, and subsequent events + + are aggressively throttled to a rhythmic heartbeat ratio. + """ + clock = FakeClock(0.0) + # Allow 5 events per window, then allow every 10th event thereafter + rl = ExceptionRateLimiter( + max_exceptions=5, window_seconds=60.0, post_limit_every=10, clock=clock + ) + + # 1. First 5 events must be allowed through cleanly + for i in range(5): + assert rl.should_capture() is True, ( + f"Event {i + 1} should be captured within max limits" + ) + + # 2. Events 6 through 9 must be completely blocked by the emergency brake + for i in range(4): + assert rl.should_capture() is False, ( + f"Event {i + 6} should be blocked after max limits" + ) + + # 3. The 10th total event triggers the heartbeat check (10 % 10 == 0) and passes + assert rl.should_capture() is True, ( + "The 10th event should act as a heartbeat signal" + ) + + # 4. The next 9 events (11 through 19) are dropped + for i in range(9): + assert rl.should_capture() is False, ( + f"Event {i + 11} should be blocked during heartbeat cooldown" + ) + + # 5. The 20th total event triggers the next heartbeat check (20 % 10 == 0) and passes + assert rl.should_capture() is True, ( + "The 20th event should act as a heartbeat signal" + ) + + +def test_window_resets_counters_cleanly(): + """Verify that once the time window boundary is crossed, the counter + + completely clears and opens the gate wide again. + """ + clock = FakeClock(0.0) + rl = ExceptionRateLimiter( + max_exceptions=2, window_seconds=10.0, post_limit_every=10, clock=clock + ) + + # Fill up the current window capacity + assert rl.should_capture() is True # Count = 1 (Allowed) + assert rl.should_capture() is True # Count = 2 (Allowed) + assert rl.should_capture() is False # Count = 3 (Blocked hard!) + + # Advance time past the 10.0-second configuration limit + clock.advance(10.1) + + # The rate limiter must reset internal tracking counters to 0 + assert rl.should_capture() is True, "First event in fresh window should pass" + assert rl.should_capture() is True, "Second event in fresh window should pass" + assert rl.should_capture() is False, "Third event in fresh window should block" + + +@pytest.mark.parametrize( + "kwargs, match_msg", + [ + ({"max_exceptions": -2}, "max_exceptions must be >= -1"), + ({"window_seconds": 0}, "window_seconds must be > 0"), + ({"window_seconds": -5.5}, "window_seconds must be > 0"), + ({"post_limit_every": 0}, "post_limit_every must be > 0"), + ], +) +def test_invalid_parameters_raise_value_errors(kwargs, match_msg): + """Verify that the class initialization cleanly blocks invalid configuration parameter boundaries.""" + with pytest.raises(ValueError, match=match_msg): + ExceptionRateLimiter(**kwargs) + + +def test_post_every_one_allows_all_events_after_limit(): + """Verify that setting post_limit_every to 1 acts as an analytical bypass, + + allowing everything through after the threshold cap is blown. + """ + clock = FakeClock(0.0) + rl = ExceptionRateLimiter( + max_exceptions=0, window_seconds=10.0, post_limit_every=1, clock=clock + ) + + # Because max=0, all events are post-limit, but since post_limit_every=1, everything passes (N % 1 == 0) + assert rl.should_capture() is True + assert rl.should_capture() is True + assert rl.should_capture() is True