Skip to content

Commit 538b219

Browse files
author
LittleMouse
committed
[perf] Optimize different model loading methods
1 parent fec1b48 commit 538b219

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

api_server.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ async def get_backend(self, model_name):
7070
if model_config["type"] == "openai_proxy":
7171
self.backends[model_name] = OpenAIProxyBackend(model_config)
7272
elif model_config["type"] in ("llm", "vlm"):
73+
logger.debug(f"self.llm_models: {self.llm_models}")
74+
if self.llm_models and model_name not in self.llm_models:
75+
for old_model in self.llm_models:
76+
old_instance = self.backends.pop(old_model, None)
77+
if old_instance:
78+
await old_instance.close()
79+
self.llm_models.clear()
7380
count = model_config["pool_size"]
7481
while len(self.llm_models) >= count:
7582
oldest_model = self.llm_models.pop(0)

backend/llm_client_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ async def _release_client(self, client):
123123

124124
async def close(self):
125125
async with self._pool_lock:
126+
for task in self._active_tasks:
127+
task.cancel()
126128
for client in self._client_pool:
127129
client.exit()
128130
self._client_pool.clear()

0 commit comments

Comments
 (0)