From 6fb3775cb8e675e52ff0fdb116422c0461c09049 Mon Sep 17 00:00:00 2001 From: SaSteMi Date: Sat, 13 Jun 2026 22:36:50 +0200 Subject: [PATCH] Add ContextLengthExceededError and validator Introduce a ContextLengthExceededError exception and validate_context_requirements() helper to enforce prompt/context length constraints. The validator raises when context_len exceeds max_seq_len or when context_len + max_tokens would exceed an optional cache_max_num_tokens; additional optional parameters (max_rq_tokens, allocation_boundary) are present for future checks. This fixes [issue #426](https://github.com/theroyallab/tabbyAPI/issues/426). I'm new to this to, this fix was generated by Antigravity, but I thought I'd share cause it works for me now. --- common/errors.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 common/errors.py diff --git a/common/errors.py b/common/errors.py new file mode 100644 index 00000000..502591c3 --- /dev/null +++ b/common/errors.py @@ -0,0 +1,19 @@ +class ContextLengthExceededError(Exception): + pass + +def validate_context_requirements( + context_len: int, + max_seq_len: int, + max_tokens: int, + cache_max_num_tokens: int = None, + max_rq_tokens: int = None, + allocation_boundary: int = None, +): + if context_len > max_seq_len: + raise ContextLengthExceededError( + f"Prompt length {context_len} is greater than max_seq_len {max_seq_len}" + ) + if cache_max_num_tokens is not None and context_len + max_tokens > cache_max_num_tokens: + raise ContextLengthExceededError( + f"Prompt length {context_len} + max_tokens {max_tokens} is greater than cache size {cache_max_num_tokens}" + )