11import math
2- from tracemalloc import stop
32import uuid
43from typing import Any , List , Optional
54import asyncio
@@ -55,12 +54,6 @@ def _build_inputs(self, prompt: str, history: Optional[List[dict]] = None) -> An
5554 add_generation_prompt = True
5655 )
5756
58- async def _consume_generator (self , generator ):
59- final_output = None
60- async for request_output in generator :
61- final_output = request_output
62- return final_output
63-
6457 async def generate_answer (
6558 self , text : str , history : Optional [List [str ]] = None , ** extra : Any
6659 ) -> str :
@@ -76,18 +69,20 @@ async def generate_answer(
7669
7770 try :
7871 result_generator = self .engine .generate (full_prompt , sp , request_id = request_id )
79- final_output = await asyncio .wait_for (
80- self ._consume_generator (result_generator ),
81- timeout = self .timeout
82- )
72+ final_output = None
73+ async with asyncio .timeout (self .timeout ):
74+ async for request_output in result_generator :
75+ if request_output .finished :
76+ final_output = request_output
77+ break
8378
8479 if not final_output or not final_output .outputs :
8580 return ""
8681
8782 result_text = final_output .outputs [0 ].text
8883 return result_text
8984
90- except (Exception , asyncio .CancelledError ):
85+ except (Exception , asyncio .CancelledError , asyncio . TimeoutError ):
9186 await self .engine .abort (request_id )
9287 raise
9388
@@ -105,10 +100,12 @@ async def generate_topk_per_token(
105100
106101 try :
107102 result_generator = self .engine .generate (full_prompt , sp , request_id = request_id )
108- final_output = await asyncio .wait_for (
109- self ._consume_generator (result_generator ),
110- timeout = self .timeout
111- )
103+ final_output = None
104+ async with asyncio .timeout (self .timeout ):
105+ async for request_output in result_generator :
106+ if request_output .finished :
107+ final_output = request_output
108+ break
112109
113110 if (
114111 not final_output
@@ -138,7 +135,7 @@ async def generate_topk_per_token(
138135 return [main_token ]
139136 return []
140137
141- except (Exception , asyncio .CancelledError ):
138+ except (Exception , asyncio .CancelledError , asyncio . TimeoutError ):
142139 await self .engine .abort (request_id )
143140 raise
144141
0 commit comments