Skip to content

Commit c64f974

Browse files
committed
[perf] Optimize llm_client parameter configuration method
1 parent 131c628 commit c64f974

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

backend/llm_client_backend.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
from services.memory_check import MemoryChecker
1515

1616
class LlmClientBackend(BaseModelBackend):
17-
MAX_CONTEXT_LENGTH = 500
18-
POOL_SIZE = 2
19-
2017
def __init__(self, model_config):
2118
super().__init__(model_config)
2219
self._client_pool = []
2320
self._active_clients = {}
2421
self._pool_lock = asyncio.Lock()
2522
self.logger = logging.getLogger("api.llm")
23+
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
24+
self.POOL_SIZE = model_config.get("pool_size", 2)
2625
self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
2726
self._active_tasks = weakref.WeakSet()
2827
self.memory_checker = MemoryChecker(
@@ -55,15 +54,16 @@ async def _get_client(self, request):
5554
try:
5655
await asyncio.wait_for(self._pool_lock.acquire(), timeout=30.0)
5756

57+
if self._client_pool:
58+
client = self._client_pool.pop()
59+
self.logger.debug(f"Reusing client from pool | ID:{id(client)}")
60+
return client
61+
5862
if "memory_required" in self.config:
5963
await self.memory_checker.check_memory(
6064
self.config["memory_required"]
6165
)
6266

63-
if self._client_pool:
64-
client = self._client_pool.pop()
65-
self.logger.debug(f"Reusing client from pool | ID:{id(client)}")
66-
return client
6767

6868
if len(self._active_clients) >= self.POOL_SIZE:
6969
raise RuntimeError("Connection pool exhausted")

config/config.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ models:
2626
model_name: "qwen2.5-0.5B-prefill-20e"
2727
object: "llm.setup"
2828
pool_size: 2
29+
max_context_length: 128
2930
response_format: "llm.utf-8.stream"
3031
input: "llm.utf-8"
3132
memory_required: 716800
@@ -39,22 +40,24 @@ models:
3940
model_name: "Qwen2.5-0.5B-w8a16"
4041
object: "llm.setup"
4142
pool_size: 2
43+
max_context_length: 1024
4244
response_format: "llm.utf-8.stream"
4345
input: "llm.utf-8"
4446
memory_required: 716800
4547
system_prompt: |
4648
You are a helpful assistant.
4749
48-
deepseek-r1-1.5B-ax630c:
50+
deepseek-r1-distill-qwen-1.5b:
4951
type: tcp_client
5052
host: "192.168.20.65"
5153
port: 10001
5254
model_name: "deepseek-r1-1.5B-ax630c"
5355
object: "llm.setup"
5456
pool_size: 1
57+
max_context_length: 1024
5558
response_format: "llm.utf-8.stream"
5659
input: "llm.utf-8"
57-
memory_required: 1572864
60+
memory_required: 2097152
5861
system_prompt: |
5962
You are a helpful assistant.
6063
@@ -64,7 +67,8 @@ models:
6467
port: 10001
6568
model_name: "internvl2.5-1B-ax630c"
6669
object: "vlm.setup"
67-
pool_size: 1
70+
pool_size: 2
71+
max_context_length: 256
6872
response_format: "vlm.utf-8.stream"
6973
input: "vlm.utf-8"
7074
memory_required: 1048576

0 commit comments

Comments
 (0)