@@ -54,20 +54,28 @@ async def _get_client(self, request):
5454 try :
5555 await asyncio .wait_for (self ._pool_lock .acquire (), timeout = 30.0 )
5656
57- if self ._client_pool :
58- client = self ._client_pool .pop ()
59- self .logger .debug (f"Reusing client from pool | ID:{ id (client )} " )
60- return client
57+ start_time = time .time ()
58+ timeout = 30.0
59+ retry_interval = 3
6160
61+ while True :
62+ if self ._client_pool :
63+ client = self ._client_pool .pop ()
64+ self .logger .debug (f"Reusing client from pool | ID:{ id (client )} " )
65+ return client
66+
67+ if len (self ._active_clients ) < self .POOL_SIZE :
68+ break
69+
70+ self ._pool_lock .release ()
71+ await asyncio .sleep (retry_interval )
72+ await asyncio .wait_for (self ._pool_lock .acquire (), timeout = timeout - (time .time () - start_time ))
73+
6274 if "memory_required" in self .config :
6375 await self .memory_checker .check_memory (
6476 self .config ["memory_required" ]
6577 )
6678
67-
68- if len (self ._active_clients ) >= self .POOL_SIZE :
69- raise RuntimeError ("Connection pool exhausted" )
70-
7179 self .logger .debug ("Creating new LLM client" )
7280 client = LLMClient (
7381 host = self .config ["host" ],
@@ -99,6 +107,8 @@ async def _get_client(self, request):
99107 )
100108 )
101109 return client
110+ except asyncio .TimeoutError :
111+ raise RuntimeError ("Server busy, please try again later." )
102112 finally :
103113 self ._pool_lock .release ()
104114
0 commit comments