Unverified Commit 694d75cf authored by Tashfeen's avatar Tashfeen Committed by GitHub

fix(server): error handler crash + missing platforms + per-provider timeout (#7)

* fix(server): guard errorHandler against headers-already-sent

When an LLM completion errors mid-stream, the response is already
flushing tokens to the client. The handler then unconditionally called
res.status().json(), throwing ERR_HTTP_HEADERS_SENT and triggering a
pm2 restart. Short-circuit to Express's default handler once headers
have been sent so the socket closes cleanly.

* feat(providers): configurable per-provider HTTP timeout

OpenAICompatProvider now accepts an optional timeoutMs constructor
option (default 15000ms). Cloud APIs respond well within the existing
default, but locally-hosted OpenAI-compatible inference (llama.cpp,
vLLM on CPU) can take 30-120s for long prompts and was being aborted
mid-generation, causing the proxy to mark the key invalid.

* fix(keys): add zhipu, moonshot, minimax to platform allowlist

These three platforms exist in the Platform type union and have
provider registrations, but were missing from the PLATFORMS array
in the keys route. Without them, the addKey Zod schema rejects
requests to add API keys for these providers.
parent fbb2a175
import type { Request, Response, NextFunction } from 'express'; import type { Request, Response, NextFunction } from 'express';
export function errorHandler(err: Error, _req: Request, res: Response, _next: NextFunction) { export function errorHandler(err: Error, _req: Request, res: Response, next: NextFunction) {
console.error('[Error]', err.message); console.error('[Error]', err.message);
if (res.headersSent) return next(err);
const status = (err as any).status ?? 500; const status = (err as any).status ?? 500;
res.status(status).json({ res.status(status).json({
error: { error: {
......
...@@ -17,6 +17,9 @@ export class OpenAICompatProvider extends BaseProvider { ...@@ -17,6 +17,9 @@ export class OpenAICompatProvider extends BaseProvider {
private readonly baseUrl: string; private readonly baseUrl: string;
private readonly extraHeaders: Record<string, string>; private readonly extraHeaders: Record<string, string>;
private readonly validateUrl?: string; private readonly validateUrl?: string;
/** Per-provider HTTP timeout override. Cloud APIs finish in ~15s; locally-hosted
* inference (llama.cpp / vLLM on CPU) can take 30-120s for long prompts. Default 15000. */
private readonly timeoutMs: number;
constructor(opts: { constructor(opts: {
platform: Platform; platform: Platform;
...@@ -24,6 +27,7 @@ export class OpenAICompatProvider extends BaseProvider { ...@@ -24,6 +27,7 @@ export class OpenAICompatProvider extends BaseProvider {
baseUrl: string; baseUrl: string;
extraHeaders?: Record<string, string>; extraHeaders?: Record<string, string>;
validateUrl?: string; validateUrl?: string;
timeoutMs?: number;
}) { }) {
super(); super();
this.platform = opts.platform; this.platform = opts.platform;
...@@ -31,6 +35,7 @@ export class OpenAICompatProvider extends BaseProvider { ...@@ -31,6 +35,7 @@ export class OpenAICompatProvider extends BaseProvider {
this.baseUrl = opts.baseUrl; this.baseUrl = opts.baseUrl;
this.extraHeaders = opts.extraHeaders ?? {}; this.extraHeaders = opts.extraHeaders ?? {};
this.validateUrl = opts.validateUrl; this.validateUrl = opts.validateUrl;
this.timeoutMs = opts.timeoutMs ?? 15000;
} }
async chatCompletion( async chatCompletion(
...@@ -56,7 +61,7 @@ export class OpenAICompatProvider extends BaseProvider { ...@@ -56,7 +61,7 @@ export class OpenAICompatProvider extends BaseProvider {
tool_choice: options?.tool_choice, tool_choice: options?.tool_choice,
parallel_tool_calls: options?.parallel_tool_calls, parallel_tool_calls: options?.parallel_tool_calls,
}), }),
}); }, this.timeoutMs);
if (!res.ok) { if (!res.ok) {
const err = await res.json().catch(() => ({})); const err = await res.json().catch(() => ({}));
...@@ -92,7 +97,7 @@ export class OpenAICompatProvider extends BaseProvider { ...@@ -92,7 +97,7 @@ export class OpenAICompatProvider extends BaseProvider {
parallel_tool_calls: options?.parallel_tool_calls, parallel_tool_calls: options?.parallel_tool_calls,
stream: true, stream: true,
}), }),
}); }, this.timeoutMs);
if (!res.ok) { if (!res.ok) {
const err = await res.json().catch(() => ({})); const err = await res.json().catch(() => ({}));
......
...@@ -9,6 +9,7 @@ export const keysRouter = Router(); ...@@ -9,6 +9,7 @@ export const keysRouter = Router();
const PLATFORMS = [ const PLATFORMS = [
'google', 'groq', 'cerebras', 'sambanova', 'nvidia', 'mistral', 'google', 'groq', 'cerebras', 'sambanova', 'nvidia', 'mistral',
'openrouter', 'github', 'huggingface', 'cohere', 'cloudflare', 'openrouter', 'github', 'huggingface', 'cohere', 'cloudflare',
'zhipu', 'moonshot', 'minimax',
] as const; ] as const;
const addKeySchema = z.object({ const addKeySchema = z.object({
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment