Skip to content

Commit 7e609b7

Browse files
author
LittleMouse
committed
[update] add cmm_size & precompute
1 parent bf62ede commit 7e609b7

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

backend/llm_client_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ async def _get_client(self, request):
7777
await asyncio.sleep(retry_interval)
7878
await asyncio.wait_for(self._pool_lock.acquire(), timeout=timeout - (time.time() - start_time))
7979

80-
# if "memory_required" in self.config:
81-
# await self.memory_checker.check_memory(self.config["memory_required"])
80+
if "memory_required" in self.config:
81+
await self.memory_checker.check_memory(self.config["memory_required"])
8282

8383
self.logger.debug("Creating new LLM client")
8484
client = LLMClient(

services/model_list.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ async def get_model_list(self, required_mem: int) -> None:
5656
"object": f"{model_type}.setup",
5757
"system_prompt": "You are a helpful assistant."
5858
})
59+
60+
mode_param = model_data.get("mode_param", {})
61+
precompute_len = None
62+
cmm_size = None
63+
if isinstance(mode_param, dict):
64+
precompute_len = mode_param.get("precompute_len")
65+
cmm_size = mode_param.get("cmm_size")
5966
if '-1.5B-' in mode:
6067
new_entry['memory_required'] = 1782579
6168
new_entry['pool_size'] = 1
@@ -68,6 +75,8 @@ async def get_model_list(self, required_mem: int) -> None:
6875
else:
6976
new_entry['memory_required'] = 1363148
7077
new_entry['pool_size'] = 2
78+
if cmm_size is not None:
79+
new_entry['memory_required'] = cmm_size
7180

7281
if '-p256-' in mode:
7382
new_entry['max_context_length'] = 256
@@ -81,6 +90,9 @@ async def get_model_list(self, required_mem: int) -> None:
8190
if '-ctx-' in mode:
8291
new_entry['max_context_length'] = 1024
8392

93+
if precompute_len is not None:
94+
new_entry['max_context_length'] = precompute_len
95+
8496
elif model_type == 'tts':
8597
if 'melotts' in mode.lower():
8698
obj = 'melotts.setup'

0 commit comments

Comments
 (0)