Skip to content

Commit 82997ac

Browse files
author
LittleMouse
committed
[clean] Clean up the code
1 parent 1077efb commit 82997ac

File tree

10 files changed

+16
-139
lines changed

10 files changed

+16
-139
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \
7878
-H "Content-Type: application/json" \
7979
-H "Authorization: Bearer YOUR_KEY" \
8080
-d '{
81-
"model": "gpt-3.5-turbo",
81+
"model": "qwen2.5-0.5B-p256-ax630c",
8282
"messages": [{"role": "user", "content": "Hello!"}],
8383
"temperature": 0.7
8484
}'
@@ -90,7 +90,7 @@ curl -X POST "http://localhost:8000/v1/audio/speech" \
9090
-H "Content-Type: application/json" \
9191
-H "Authorization: Bearer YOUR_KEY" \
9292
-d '{
93-
"model": "tts-1",
93+
"model": "melotts",
9494
"input": "Hello world!",
9595
"voice": "alloy"
9696
}'

api_server.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
import os
22
import uuid
33
import yaml
4-
from fastapi import FastAPI, Request, HTTPException, File, Form, UploadFile
5-
from fastapi.responses import JSONResponse, StreamingResponse
64
import logging
7-
from slowapi import Limiter
8-
from slowapi.util import get_remote_address
95
import time
106
import json
117
import asyncio
128

9+
from fastapi import FastAPI, Request, HTTPException, File, Form, UploadFile
10+
from fastapi.responses import JSONResponse, StreamingResponse
1311
from backend import (
14-
TestBackend,
1512
OpenAIProxyBackend,
1613
LlmClientBackend,
1714
VisionModelBackend,
@@ -34,7 +31,6 @@
3431
logger = logging.getLogger("api")
3532

3633
app = FastAPI(title="OpenAI Compatible API Server")
37-
limiter = Limiter(key_func=get_remote_address)
3834

3935
class Config:
4036
def __init__(self):
@@ -113,9 +109,7 @@ async def chat_completions(request: Request, body: ChatCompletionRequest):
113109
detail=f"Unsupported model: {body.model}"
114110
)
115111

116-
try:
117-
print(f"Received request: {body.model_dump()}")
118-
112+
try:
119113
if body.stream:
120114
chunk_generator = await backend.generate(body)
121115
if not chunk_generator:
@@ -133,7 +127,6 @@ async def format_stream():
133127
chunk_dict = chunk.model_dump()
134128

135129
json_chunk = json.dumps(chunk_dict, ensure_ascii=False)
136-
print(f"Sending chunk: {json_chunk}")
137130
yield f"data: {json_chunk}\n\n"
138131
except asyncio.CancelledError:
139132
logger.warning("Client disconnected early, terminating inference...")
@@ -150,7 +143,6 @@ async def format_stream():
150143
)
151144
else:
152145
response = await backend.generate(body)
153-
print(f"Sending response: {response}")
154146
return JSONResponse(content=response)
155147

156148
except HTTPException as he:

backend/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from .test_backend import TestBackend
21
from .openai_proxy_backend import OpenAIProxyBackend
32
from .llm_client_backend import LlmClientBackend
43
from .tts_client_backend import TtsClientBackend

backend/llm_client_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def __init__(self, model_config):
2020
self._active_clients = {}
2121
self._pool_lock = asyncio.Lock()
2222
self.logger = logging.getLogger("api.llm")
23-
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 200)
23+
self.MAX_CONTEXT_LENGTH = model_config.get("max_context_length", 128)
2424
self.POOL_SIZE = model_config.get("pool_size", 2)
2525
self._inference_executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
2626
self._active_tasks = weakref.WeakSet()

backend/test_backend.py

Lines changed: 0 additions & 56 deletions
This file was deleted.

client/asr_client.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,21 +117,4 @@ def create_transcription(self, audio_data: bytes, language: str = "zh") -> str:
117117
for chunk in self.inference_stream(audio_b64, object_type="asr.base64"):
118118
full_text += chunk
119119

120-
return full_text
121-
122-
if __name__ == "__main__":
123-
with ASRClient(host='192.168.20.183') as client:
124-
setup_response = client.setup("whisper.setup", {
125-
"model": "whisper-tiny",
126-
"response_format": "asr.utf-8",
127-
"input": "whisper.base64",
128-
"language": "zh",
129-
"enoutput": True,
130-
})
131-
print("Setup response:", setup_response)
132-
133-
for chunk in client.inference_stream("AAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwQACAAEAA8AGQAWABUAHQAnADQANwAzADEAJAAlAA=="):
134-
print("Received chunk:", chunk)
135-
136-
exit_response = client.exit()
137-
print("Exit response:", exit_response)
120+
return full_text

client/llm_client.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -109,23 +109,4 @@ def _wait_response(self, request_id: str) -> dict:
109109
def connect(self):
110110
with self._lock:
111111
if not self.sock:
112-
self._connect()
113-
114-
if __name__ == "__main__":
115-
with LLMClient(host='192.168.20.183') as client:
116-
setup_response = client.setup("llm.setup", {
117-
"model": "Qwen2.5-0.5B-w8a16",
118-
"response_format": "llm.utf-8.stream",
119-
"input": "llm.utf-8",
120-
"enoutput": True,
121-
"max_token_len": 1023,
122-
"prompt": "You are a helpful assistant"
123-
})
124-
print("Setup response:", setup_response)
125-
126-
for chunk in client.inference_stream("Tell me a story"):
127-
print("Received chunk:", chunk)
128-
client.stop_inference()
129-
130-
exit_response = client.exit()
131-
print("Exit response:", exit_response)
112+
self._connect()

client/sys_client.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,4 @@ def create_transcription(self, audio_data: bytes, language: str = "zh") -> str:
137137
for chunk in self.inference_stream(audio_b64, object_type="asr.base64"):
138138
full_text += chunk
139139

140-
return full_text
141-
142-
if __name__ == "__main__":
143-
with SYSClient(host='192.168.20.48') as client:
144-
hw_response = client.hwinfo()
145-
print("hwinfo response:", hw_response)
146-
cmm_response = client.cmminfo()
147-
print("cmm response:", cmm_response)
148-
model_list_response = client.model_list()
149-
print("model_list_response:", model_list_response)
140+
return full_text

client/tts_client.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,4 @@ def _wait_response(self, request_id: str) -> dict:
114114
def connect(self):
115115
with self._lock:
116116
if not self.sock:
117-
self._connect()
118-
119-
if __name__ == "__main__":
120-
with TTSClient(host='192.168.20.183') as client:
121-
setup_response = client.setup("melotts.setup", {
122-
"model": "melotts_zh-cn",
123-
"response_format": "pcm.stream.base64",
124-
"input": "tts.utf-8",
125-
"enoutput": True,
126-
})
127-
print("Setup response:", setup_response)
128-
time.sleep(1)
129-
for chunk in client.inference_stream("好的,我来给你讲一个故事。", object_type="tts.utf-8"):
130-
print("Received data chunk:", chunk)
131-
with open('output_base64.txt', 'a') as f_base:
132-
f_base.write(chunk + '\n')
133-
with open('output.pcm', 'ab') as f_pcm:
134-
f_pcm.write(base64.b64decode(chunk))
135-
exit_response = client.exit()
136-
print("Exit response:", exit_response)
117+
self._connect()

services/model_list.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ async def get_model_list(self, required_mem: int) -> None:
6565
elif '-0.5B-' in mode:
6666
new_entry['memory_required'] = 560460
6767
new_entry['pool_size'] = 2
68+
else:
69+
new_entry['memory_required'] = 1363148
70+
new_entry['pool_size'] = 2
71+
72+
if '-p256-' in mode:
73+
new_entry['max_context_length'] = 256
6874

6975
elif model_type == 'tts':
7076
if 'melotts' in mode.lower():

0 commit comments

Comments
 (0)