Skip to content

Commit ffb2fcb

Browse files
committed
fix: CheckLLMResponseStream QPS is too high
1 parent 7066c72 commit ffb2fcb

3 files changed

Lines changed: 11 additions & 6 deletions

File tree

volcenginesdkwafruntime/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
from volcenginesdkwaf import *
1616
from volcenginesdkwafruntime.api.waf_runtime_api import WAFRuntimeApi
1717
from volcenginesdkwafruntime.models.llm_stream_session import LLMStreamSession
18-
19-
__all__ = ["WAFRuntimeApi", "LLMStreamSession"]
18+
from volcenginesdkwafruntime.models.llm_stream_session import LLM_STREAM_SEND_EXPONENT
19+
from volcenginesdkwafruntime.models.llm_stream_session import LLM_STREAM_SEND_BASE_WINDOW
20+
__all__ = ["WAFRuntimeApi", "LLMStreamSession" , "LLM_STREAM_SEND_EXPONENT" , "LLM_STREAM_SEND_BASE_WINDOW"]

volcenginesdkwafruntime/api/waf_runtime_api.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
from volcenginesdkwaf import WAFApi, CheckLLMResponseStreamRequest
3-
from volcenginesdkwafruntime.models.llm_stream_session import LLMStreamSession
3+
from volcenginesdkwafruntime.models.llm_stream_session import LLMStreamSession,LLM_STREAM_SEND_EXPONENT,LLM_STREAM_SEND_BASE_WINDOW
44

55
global_llm_send_len = 10
66

@@ -52,13 +52,15 @@ def check_llm_response_stream(
5252

5353
# 重置流缓冲区和发送长度
5454
session.set_stream_send_len(0)
55+
session.CurrentSendWindow = session.CurrentSendWindow * LLM_STREAM_SEND_EXPONENT
5556

5657
return response
5758

5859
# 3. 处理 use_stream 为其他值的情况(累计长度,超过阈值才发送)
5960
else:
6061
# 如果未发送长度超过 10 个字符,调用 API
61-
if session.get_stream_send_len() > global_llm_send_len:
62+
if session.get_stream_send_len() >= session.CurrentSendWindow:
63+
session.CurrentSendWindow = session.CurrentSendWindow * LLM_STREAM_SEND_EXPONENT
6264
# 准备请求体,使用 session 中的完整流内容
6365
body.content = session.get_stream_buf()
6466
body.msg_id = session.get_msg_id()

volcenginesdkwafruntime/models/llm_stream_session.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
from volcenginesdkwaf import CheckLLMResponseStreamResponse
44

5+
LLM_STREAM_SEND_BASE_WINDOW = 10
6+
LLM_STREAM_SEND_EXPONENT = 2
57

6-
class LLMStreamSession:
7-
"""对应 Java 中的 LLMStreamSession 类"""
88

9+
class LLMStreamSession:
910
def __init__(
1011
self,
1112
stream_buf: str = "",
@@ -17,6 +18,7 @@ def __init__(
1718
self.stream_send_len = stream_send_len
1819
self.msg_id = msg_id
1920
self.default_body = default_body
21+
self.CurrentSendWindow = LLM_STREAM_SEND_BASE_WINDOW
2022

2123
def get_stream_buf(self) -> str:
2224
return self.stream_buf

0 commit comments

Comments
 (0)