forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhf_langchain_inference.py
52 lines (42 loc) · 1.57 KB
/
hf_langchain_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import interface
import utils
from langchain.llms.base import LLM
class HFInference(LLM):
"""LangChain LLM implementation which uses the HF inference server configured in the worker settings."""
max_new_tokens: int = 512
top_k: int | None = None
top_p: float | None = None
typical_p: float | None = None
temperature: float = 0.8
repetition_penalty: float | None = None
stop_sequences: list[str] = []
seed: int = 42
inference_server_url: str = ""
@property
def _llm_type(self) -> str:
return "hf-inference"
def _call(self, prompt: str, stop: list[str] | None = None) -> str:
if stop is None:
stop = self.stop_sequences
else:
stop += self.stop_sequences
request = interface.GenerateStreamRequest(
inputs=prompt,
parameters=interface.GenerateStreamParameters(
stop=stop,
max_new_tokens=self.max_new_tokens,
top_k=self.top_k,
top_p=self.top_p,
typical_p=self.typical_p,
temperature=self.temperature,
repetition_penalty=self.repetition_penalty,
seed=self.seed,
),
)
for event in utils.get_inference_server_stream_events(request):
stream_response = event
generated_text = stream_response.generated_text or ""
for stop_seq in stop:
if stop_seq in generated_text:
generated_text = generated_text[: generated_text.index(stop_seq)]
return generated_text