forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
63 lines (46 loc) · 1.65 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# A FastAPI server to run the safety pipeline
import asyncio
from concurrent.futures import ThreadPoolExecutor
import fastapi
import uvicorn
from blade2blade import Blade2Blade
from loguru import logger
from oasst_shared.schemas import inference
from settings import settings
app = fastapi.FastAPI()
@app.middleware("http")
async def log_exceptions(request: fastapi.Request, call_next):
try:
response = await call_next(request)
except Exception:
logger.exception("Exception in request")
raise
return response
pipeline_loaded: bool = False
pipeline: Blade2Blade
executor = ThreadPoolExecutor()
@app.on_event("startup")
async def load_pipeline():
global pipeline_loaded, pipeline
pipeline = Blade2Blade(settings.safety_model_name)
# warmup
input = "|prompter|Hey,how are you?|endoftext|"
_ = pipeline.predict(input)
pipeline_loaded = True
logger.info("Pipeline loaded")
async def async_predict(pipeline: Blade2Blade, inputs: str):
return await asyncio.get_event_loop().run_in_executor(executor, pipeline.predict, inputs)
@app.post("/safety", response_model=inference.SafetyResponse)
async def safety(request: inference.SafetyRequest):
global pipeline_loaded, pipeline
while not pipeline_loaded:
await asyncio.sleep(1)
outputs = await async_predict(pipeline, request.inputs)
return inference.SafetyResponse(outputs=outputs)
@app.get("/health")
async def health():
if not pipeline_loaded:
raise fastapi.HTTPException(status_code=503, detail="Server not fully loaded")
return {"status": "ok"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8008)