Skip to content

Commit 9d4a2d0

Browse files
authored
Merge pull request #4 from m5stack/dev
[perf] Optimize contextual conversations
2 parents 6f78344 + d5a2dc4 commit 9d4a2d0

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

backend/llm_client_backend.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,15 @@ def _truncate_history(self, messages: List[Message]) -> List[Message]:
188188
total_length = 0
189189
keep_messages = []
190190

191+
for msg in reversed(messages):
192+
if msg.role == "system":
193+
total_length += self._count_tokens(msg.content)
194+
total_length += 16
195+
191196
# Process in reverse to keep latest messages
192197
for msg in reversed(messages):
193-
if msg.role == "system": # Always keep system messages
194-
keep_messages.insert(0, msg)
195-
continue
198+
msg_length = 0
196199
if isinstance(msg.content, list):
197-
msg_length = 0
198200
for item in msg.content:
199201
if item.type == "text":
200202
msg_length += self._count_tokens(item.text)
@@ -203,6 +205,10 @@ def _truncate_history(self, messages: List[Message]) -> List[Message]:
203205
break
204206
else:
205207
msg_length = self._count_tokens(msg.content)
208+
if msg.role == "user":
209+
msg_length += 3
210+
if msg.role == "assistant":
211+
msg_length += 3
206212
if total_length + msg_length > self.MAX_CONTEXT_LENGTH:
207213
break
208214
total_length += msg_length
@@ -227,6 +233,12 @@ async def generate(self, request: ChatCompletionRequest):
227233
try:
228234
truncated_messages = self._truncate_history(request.messages)
229235

236+
if not truncated_messages:
237+
raise HTTPException(
238+
status_code=400,
239+
detail="The input content exceeds the maximum length supported by the model."
240+
)
241+
230242
query_lines = []
231243
base64_images = []
232244
system_prompt = ""

0 commit comments

Comments
 (0)