Skip to content

Commit fec1b48

Browse files
author
LittleMouse
committed
[perf] Optimize model loading method
1 parent 423fa93 commit fec1b48

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

backend/llm_client_backend.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ async def _get_client(self, request):
6868

6969
if len(self._active_clients) < self.POOL_SIZE:
7070
break
71+
72+
for task in self._active_tasks:
73+
task.cancel()
74+
# Will interrupt the activated client inference
7175

7276
self._pool_lock.release()
7377
await asyncio.sleep(retry_interval)
@@ -134,8 +138,7 @@ async def inference_stream(self, query: str, base64_images: list, request: ChatC
134138

135139
loop = asyncio.get_event_loop()
136140
for i, image_data in enumerate(base64_images):
137-
message = client.send_jpeg(image_data, object_type="vlm.jpeg.base64")
138-
print(f"Sending JPEG data #{i+1}: {message[:20]}...")
141+
client.send_jpeg(image_data, object_type="vlm.jpeg.base64")
139142

140143
sync_gen = client.inference_stream(
141144
query,

0 commit comments

Comments
 (0)