Skip to content

Commit dcc871d

Browse files
author
LittleMouse
committed
[fix] Fix model list retrieval
1 parent ccbf41e commit dcc871d

File tree

3 files changed

+6
-121
lines changed

3 files changed

+6
-121
lines changed

api_server.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ async def get_backend(self, model_name):
6969
if model_config["type"] == "openai_proxy":
7070
self.backends[model_name] = OpenAIProxyBackend(model_config)
7171
elif model_config["type"] in ("llm", "vlm"):
72-
while len(self.llm_models) >= 2:
72+
count = model_config["pool_size"]
73+
while len(self.llm_models) >= count:
7374
oldest_model = self.llm_models.pop(0)
7475
old_instance = self.backends.pop(oldest_model, None)
7576
if old_instance:
@@ -307,7 +308,7 @@ async def create_translation(
307308
@app.get("/v1/models")
308309
async def list_models():
309310
models_info = []
310-
for model_name in _dispatcher.backends.keys():
311+
for model_name in config.data["models"].keys():
311312
model_config = config.data["models"].get(model_name, {})
312313
models_info.append({
313314
"id": model_name,

backend/llm_client_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, model_config):
2020
self._active_clients = {}
2121
self._pool_lock = asyncio.Lock()
2222
self.logger = logging.getLogger("api.llm")
23-
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 500)
23+
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
2424
self.POOL_SIZE = model_config.get("pool_size", 2)
2525
self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
2626
self._active_tasks = weakref.WeakSet()

config/config.yaml

Lines changed: 2 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1,119 +1,3 @@
1-
# config.yaml
21
server:
3-
host: 0.0.0.0
4-
port: 8000
5-
6-
models:
7-
llama2-7b:
8-
type: llama.cpp
9-
10-
gpt-3.5-turbo-proxy:
11-
type: openai_proxy
12-
api_key: sk-
13-
base_url: https://api.openai.com/v1
14-
model: gpt-3.5-turbo
15-
16-
deepseek-r1:
17-
type: openai_proxy
18-
api_key: sk-
19-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
20-
model: deepseek-r1
21-
22-
qwen2.5-0.5B-p256-ax630c:
23-
type: tcp_client
24-
host: "192.168.20.56"
25-
port: 10001
26-
model_name: "qwen2.5-0.5B-p256-ax630c"
27-
object: "llm.setup"
28-
pool_size: 2
29-
max_context_length: 128
30-
response_format: "llm.utf-8.stream"
31-
input: "llm.utf-8"
32-
memory_required: 560460
33-
system_prompt: |
34-
You are a helpful assistant.
35-
36-
qwen2.5-1.5B-p256-ax630c:
37-
type: tcp_client
38-
host: "192.168.20.56"
39-
port: 10001
40-
model_name: "qwen2.5-1.5B-p256-ax630c"
41-
object: "llm.setup"
42-
pool_size: 1
43-
max_context_length: 128
44-
response_format: "llm.utf-8.stream"
45-
input: "llm.utf-8"
46-
memory_required: 1686216
47-
system_prompt: |
48-
You are a helpful assistant.
49-
50-
deepseek-r1-1.5B-p256-ax630c:
51-
type: tcp_client
52-
host: "192.168.20.56"
53-
port: 10001
54-
model_name: "deepseek-r1-1.5B-p256-ax630c"
55-
object: "llm.setup"
56-
pool_size: 1
57-
max_context_length: 128
58-
response_format: "llm.utf-8.stream"
59-
input: "llm.utf-8"
60-
memory_required: 1686552
61-
system_prompt: |
62-
You are a helpful assistant.
63-
64-
llama3.2-1B-p256-ax630c:
65-
type: tcp_client
66-
host: "192.168.20.56"
67-
port: 10001
68-
model_name: "llama3.2-1B-p256-ax630c"
69-
object: "llm.setup"
70-
pool_size: 2
71-
max_context_length: 128
72-
response_format: "llm.utf-8.stream"
73-
input: "llm.utf-8"
74-
memory_required: 1336288
75-
system_prompt: |
76-
You are a helpful assistant.
77-
78-
internvl2.5-1B-ax630c:
79-
type: tcp_client
80-
host: "192.168.20.56"
81-
port: 10001
82-
model_name: "internvl2.5-1B-ax630c"
83-
object: "vlm.setup"
84-
pool_size: 2
85-
max_context_length: 256
86-
response_format: "vlm.utf-8.stream"
87-
input: "vlm.utf-8"
88-
memory_required: 905356
89-
system_prompt: |
90-
You are a helpful assistant.
91-
92-
qwen-vl-plus:
93-
type: vision_model
94-
api_key: sk-
95-
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
96-
model: qwen-vl-plus
97-
max_image_size: 4194304
98-
image_timeout: 20
99-
100-
melotts:
101-
type: tts
102-
host: "192.168.20.56"
103-
port: 10001
104-
model_name: "melotts_zh-cn"
105-
object: "melotts.setup"
106-
response_format: "wav.base64"
107-
memory_required: 59764
108-
input: "tts.utf-8"
109-
110-
whisper-tiny:
111-
type: asr
112-
host: "192.168.20.56"
113-
port: 10001
114-
model_name: "whisper-tiny"
115-
object: "whisper.setup"
116-
response_format: "asr.utf-8"
117-
memory_required: 289132
118-
language: "en"
119-
input: "pcm.base64"
2+
host: 127.0.0.1
3+
port: 10001

0 commit comments

Comments
 (0)