fix(llm-server): llm server timesout issue (#173)

ZHallen122 · autofix-ci[bot] · web-flow · commit c97940b9deb5 · 2025-03-09T03:42:52.000-05:00
&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;

## Summary by CodeRabbit

- **New Features**
- Chat requests now include a configurable timeout (default: 200
seconds) to ensure timely responses and provide clearer feedback when
delays occur.
- **Chores**
- Removed an outdated timeout limit in background operations to enhance
flexibility for longer-running tasks.

&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;

---------

Co-authored-by: autofix-ci[bot] &lt;114827586+autofix-ci[bot]@users.noreply.github.com&gt;
diff --git a/llm-server/src/llm-provider.ts b/llm-server/src/llm-provider.ts
@@ -104,13 +104,27 @@ export class LLMProvider {
     return model;
   }
 
-  async chat(input: MessageInput): Promise<string> {
+  async chat(input: MessageInput, timeoutMs: number): Promise<string> {
     try {
       const model = this.getModelInstance(input.model);
-      const completion = await model.chat(input.messages);
-      return completion.choices[0].message.content || '';
+
+      // Set a timeout dynamically based on the provided value
+      const timeoutPromise = new Promise<string>((_, reject) =>
+        setTimeout(
+          () => reject(new Error('Chat request timed out')),
+          timeoutMs,
+        ),
+      );
+
+      // Race between the actual model call and the timeout
+      const completion = await Promise.race([
+        model.chat(input.messages),
+        timeoutPromise,
+      ]);
+
+      return (completion as any).choices[0].message.content || '';
     } catch (error) {
-      this.logger.error('Error in chat:', error);
+      this.logger.error(`Error in chat (Timeout: ${timeoutMs}ms):`, error);
       throw error;
     }
   }
diff --git a/llm-server/src/main.ts b/llm-server/src/main.ts
@@ -137,7 +137,8 @@ export class App {
         }
       } else {
         // Handle regular response
-        const response = await this.llmProvider.chat(input);
+        // TODO make it to dynamic Now is 200 second by defult.
+        const response = await this.llmProvider.chat(input, 200000);
         res.json({
           model: input.model,
           choices: [
diff --git a/llm-server/src/model/remote-model-instance.ts b/llm-server/src/model/remote-model-instance.ts
@@ -30,7 +30,6 @@ export class RemoteOpenAIModelEngine implements ModelInstance {
       interval: 1000, // per 1000ms (1 second)
       carryoverConcurrencyCount: true, // Carry over pending tasks
       // FIXME: hack way to set up timeout
-      timeout: 120000, // 120 second timeout to accommodate longer streams
     });
 
     // Log queue events for monitoring