1- # config.yaml
21server :
3- host : 0.0.0.0
4- port : 8000
5-
6- models :
7- llama2-7b :
8- type : llama.cpp
9-
10- gpt-3.5-turbo-proxy :
11- type : openai_proxy
12- api_key : sk-
13- base_url : https://api.openai.com/v1
14- model : gpt-3.5-turbo
15-
16- deepseek-r1 :
17- type : openai_proxy
18- api_key : sk-
19- base_url : https://dashscope.aliyuncs.com/compatible-mode/v1
20- model : deepseek-r1
21-
22- qwen2.5-0.5B-p256-ax630c :
23- type : tcp_client
24- host : " 192.168.20.56"
25- port : 10001
26- model_name : " qwen2.5-0.5B-p256-ax630c"
27- object : " llm.setup"
28- pool_size : 2
29- max_context_length : 128
30- response_format : " llm.utf-8.stream"
31- input : " llm.utf-8"
32- memory_required : 560460
33- system_prompt : |
34- You are a helpful assistant.
35-
36- qwen2.5-1.5B-p256-ax630c :
37- type : tcp_client
38- host : " 192.168.20.56"
39- port : 10001
40- model_name : " qwen2.5-1.5B-p256-ax630c"
41- object : " llm.setup"
42- pool_size : 1
43- max_context_length : 128
44- response_format : " llm.utf-8.stream"
45- input : " llm.utf-8"
46- memory_required : 1686216
47- system_prompt : |
48- You are a helpful assistant.
49-
50- deepseek-r1-1.5B-p256-ax630c :
51- type : tcp_client
52- host : " 192.168.20.56"
53- port : 10001
54- model_name : " deepseek-r1-1.5B-p256-ax630c"
55- object : " llm.setup"
56- pool_size : 1
57- max_context_length : 128
58- response_format : " llm.utf-8.stream"
59- input : " llm.utf-8"
60- memory_required : 1686552
61- system_prompt : |
62- You are a helpful assistant.
63-
64- llama3.2-1B-p256-ax630c :
65- type : tcp_client
66- host : " 192.168.20.56"
67- port : 10001
68- model_name : " llama3.2-1B-p256-ax630c"
69- object : " llm.setup"
70- pool_size : 2
71- max_context_length : 128
72- response_format : " llm.utf-8.stream"
73- input : " llm.utf-8"
74- memory_required : 1336288
75- system_prompt : |
76- You are a helpful assistant.
77-
78- internvl2.5-1B-ax630c :
79- type : tcp_client
80- host : " 192.168.20.56"
81- port : 10001
82- model_name : " internvl2.5-1B-ax630c"
83- object : " vlm.setup"
84- pool_size : 2
85- max_context_length : 256
86- response_format : " vlm.utf-8.stream"
87- input : " vlm.utf-8"
88- memory_required : 905356
89- system_prompt : |
90- You are a helpful assistant.
91-
92- qwen-vl-plus :
93- type : vision_model
94- api_key : sk-
95- base_url : https://dashscope.aliyuncs.com/compatible-mode/v1
96- model : qwen-vl-plus
97- max_image_size : 4194304
98- image_timeout : 20
99-
100- melotts :
101- type : tts
102- host : " 192.168.20.56"
103- port : 10001
104- model_name : " melotts_zh-cn"
105- object : " melotts.setup"
106- response_format : " wav.base64"
107- memory_required : 59764
108- input : " tts.utf-8"
109-
110- whisper-tiny :
111- type : asr
112- host : " 192.168.20.56"
113- port : 10001
114- model_name : " whisper-tiny"
115- object : " whisper.setup"
116- response_format : " asr.utf-8"
117- memory_required : 289132
118- language : " en"
119- input : " pcm.base64"
2+ host : 127.0.0.1
3+ port : 10001
0 commit comments