Files
Genarrative/server-node/src/services/llmClient.ts
2026-04-18 13:05:29 +08:00

511 lines
13 KiB
TypeScript

import { Readable } from 'node:stream';
import type {
Request as ExpressRequest,
Response as ExpressResponse,
} from 'express';
import type { Logger } from 'pino';
import type { AppConfig } from '../config.js';
import { HttpError, upstreamError } from '../errors.js';
import {
extractApiErrorMessage,
prepareApiResponse,
prepareEventStreamResponse,
} from '../http.js';
export type ChatMessage = {
role: 'system' | 'user' | 'assistant';
content: string;
};
type CompletionRequest = {
model?: string;
stream?: boolean;
messages: ChatMessage[];
};
type RequestExecutionOptions = {
signal?: AbortSignal;
timeoutMs?: number;
debugLabel?: string;
};
const DEFAULT_LLM_REQUEST_TIMEOUT_MS = 30000;
function normalizeBaseUrl(baseUrl: string) {
return baseUrl.replace(/\/+$/u, '');
}
function buildCompletionUrl(baseUrl: string) {
return `${normalizeBaseUrl(baseUrl)}/chat/completions`;
}
function isAbortLikeError(error: unknown) {
return (
(typeof DOMException !== 'undefined' &&
error instanceof DOMException &&
error.name === 'AbortError') ||
(error instanceof Error && error.name === 'AbortError')
);
}
function readTimeoutMs(config: AppConfig) {
const parsed = Number(config.rawEnv.LLM_REQUEST_TIMEOUT_MS);
return Number.isFinite(parsed) && parsed > 0
? Math.round(parsed)
: DEFAULT_LLM_REQUEST_TIMEOUT_MS;
}
export class UpstreamLlmTimeoutError extends HttpError {
constructor(message = 'LLM 上游请求超时') {
super(502, message, {
code: 'UPSTREAM_TIMEOUT',
});
this.name = 'UpstreamLlmTimeoutError';
}
}
export class UpstreamLlmConnectivityError extends HttpError {
constructor(message = '无法连接 LLM 上游服务') {
super(502, message, {
code: 'UPSTREAM_CONNECTIVITY',
});
this.name = 'UpstreamLlmConnectivityError';
}
}
export function isUpstreamLlmTimeoutError(
error: unknown,
): error is UpstreamLlmTimeoutError {
return (
error instanceof UpstreamLlmTimeoutError ||
(error instanceof HttpError && error.code === 'UPSTREAM_TIMEOUT')
);
}
export function isUpstreamLlmConnectivityError(
error: unknown,
): error is UpstreamLlmConnectivityError {
return (
error instanceof UpstreamLlmConnectivityError ||
(error instanceof HttpError && error.code === 'UPSTREAM_CONNECTIVITY')
);
}
export class UpstreamLlmClient {
readonly logger: Logger;
private readonly requestTimeoutMs: number;
constructor(
private readonly config: AppConfig,
logger: Logger,
) {
this.logger = logger;
this.requestTimeoutMs = readTimeoutMs(config);
}
private resolveModel(model?: string) {
return model?.trim() || this.config.llm.model;
}
private buildHeaders() {
if (!this.config.llm.apiKey) {
throw upstreamError('服务端缺少 LLM_API_KEY');
}
return {
Authorization: `Bearer ${this.config.llm.apiKey}`,
'Content-Type': 'application/json',
};
}
private createRequestSignal(
externalSignal?: AbortSignal,
timeoutMs = this.requestTimeoutMs,
) {
const controller = new AbortController();
let timedOut = false;
const handleAbort = () => controller.abort(externalSignal?.reason);
const timeout = setTimeout(() => {
timedOut = true;
controller.abort();
}, timeoutMs);
if (externalSignal) {
if (externalSignal.aborted) {
handleAbort();
} else {
externalSignal.addEventListener('abort', handleAbort, {
once: true,
});
}
}
return {
signal: controller.signal,
didTimeout() {
return timedOut;
},
cleanup() {
clearTimeout(timeout);
externalSignal?.removeEventListener('abort', handleAbort);
},
};
}
private attachRequestAbort(request: ExpressRequest) {
const controller = new AbortController();
const handleClose = () => controller.abort();
request.on('close', handleClose);
return {
signal: controller.signal,
cleanup() {
request.removeListener('close', handleClose);
},
};
}
async requestCompletion(
body: CompletionRequest,
options: RequestExecutionOptions = {},
) {
const timeoutMs =
typeof options.timeoutMs === 'number' && options.timeoutMs > 0
? Math.round(options.timeoutMs)
: this.requestTimeoutMs;
const requestSignal = this.createRequestSignal(options.signal, timeoutMs);
const model = this.resolveModel(body.model);
const debugLabel =
typeof options.debugLabel === 'string' && options.debugLabel.trim()
? options.debugLabel.trim()
: undefined;
const enableDebugLog = this.config.rawEnv.LLM_DEBUG_LOG === 'true';
if (enableDebugLog) {
this.logger.info(
{
llm_model: model,
llm_debug_label: debugLabel,
llm_messages: body.messages,
},
'[LLM_DEBUG] Request prompt',
);
}
this.logger.debug(
{
llm_model: model,
llm_stream: body.stream === true,
llm_timeout_ms: timeoutMs,
llm_debug_label: debugLabel,
},
'llm upstream request started',
);
let response: globalThis.Response;
try {
response = await fetch(buildCompletionUrl(this.config.llm.baseUrl), {
method: 'POST',
headers: this.buildHeaders(),
body: JSON.stringify({
...body,
model,
}),
signal: requestSignal.signal,
});
} catch (error) {
requestSignal.cleanup();
if (requestSignal.didTimeout() && isAbortLikeError(error)) {
throw new UpstreamLlmTimeoutError();
}
if (error instanceof TypeError) {
throw new UpstreamLlmConnectivityError();
}
this.logger.warn(
{
err: error,
llm_model: model,
llm_stream: body.stream === true,
llm_debug_label: debugLabel,
},
'llm upstream request failed',
);
throw error;
}
requestSignal.cleanup();
if (!response.ok) {
const rawText = await response.text();
throw upstreamError(extractApiErrorMessage(rawText, 'LLM 上游请求失败'));
}
this.logger.debug(
{
llm_model: model,
llm_stream: body.stream === true,
llm_status: response.status,
llm_debug_label: debugLabel,
},
'llm upstream request succeeded',
);
return response;
}
async requestMessageContent(params: {
systemPrompt: string;
userPrompt: string;
model?: string;
signal?: AbortSignal;
timeoutMs?: number;
debugLabel?: string;
}) {
const response = await this.requestCompletion(
{
model: params.model,
messages: [
{ role: 'system', content: params.systemPrompt },
{ role: 'user', content: params.userPrompt },
],
},
{
signal: params.signal,
timeoutMs: params.timeoutMs,
debugLabel: params.debugLabel,
},
);
const rawText = await response.text();
const parsed = JSON.parse(rawText) as {
choices?: Array<{
message?: {
content?: string;
};
}>;
};
const content = parsed.choices?.[0]?.message?.content?.trim();
if (!content) {
throw upstreamError('LLM 返回内容为空');
}
const enableDebugLog = this.config.rawEnv.LLM_DEBUG_LOG === 'true';
if (enableDebugLog) {
this.logger.info(
{
llm_debug_label: params.debugLabel,
llm_response_content: content,
llm_response_length: content.length,
},
'[LLM_DEBUG] Response content',
);
}
return content;
}
async streamMessageContent(params: {
systemPrompt: string;
userPrompt: string;
model?: string;
signal?: AbortSignal;
timeoutMs?: number;
debugLabel?: string;
onUpdate?: (text: string) => void;
}) {
const response = await this.requestCompletion(
{
model: params.model,
stream: true,
messages: [
{ role: 'system', content: params.systemPrompt },
{ role: 'user', content: params.userPrompt },
],
},
{
signal: params.signal,
timeoutMs: params.timeoutMs,
debugLabel: params.debugLabel,
},
);
if (!response.body) {
throw upstreamError('LLM 流式响应体不可用');
}
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let buffer = '';
let accumulatedText = '';
for (;;) {
const { done, value } = await reader.read();
if (done) {
break;
}
buffer += decoder.decode(value, { stream: true });
while (buffer.includes('\n\n')) {
const boundary = buffer.indexOf('\n\n');
const eventBlock = buffer.slice(0, boundary);
buffer = buffer.slice(boundary + 2);
for (const rawLine of eventBlock.split(/\r?\n/u)) {
const line = rawLine.trim();
if (!line.startsWith('data:')) {
continue;
}
const data = line.slice(5).trim();
if (!data || data === '[DONE]') {
continue;
}
try {
const parsed = JSON.parse(data) as {
choices?: Array<{
delta?: {
content?: string;
};
}>;
};
const delta = parsed.choices?.[0]?.delta?.content;
if (typeof delta === 'string' && delta.length > 0) {
accumulatedText += delta;
params.onUpdate?.(accumulatedText);
}
} catch {
// Ignore malformed SSE frames from the upstream model.
}
}
}
}
const content = accumulatedText.trim();
if (!content) {
throw upstreamError('LLM 返回内容为空');
}
return content;
}
async forwardCompletion(
request: ExpressRequest,
body: Record<string, unknown>,
response: ExpressResponse,
) {
const requestAbort = this.attachRequestAbort(request);
let upstreamResponse: globalThis.Response;
try {
upstreamResponse = await fetch(buildCompletionUrl(this.config.llm.baseUrl), {
method: 'POST',
headers: this.buildHeaders(),
body: JSON.stringify({
...body,
model:
typeof body.model === 'string' && body.model.trim()
? body.model
: this.config.llm.model,
}),
signal: requestAbort.signal,
});
} catch (error) {
requestAbort.cleanup();
if (requestAbort.signal.aborted && response.writableEnded) {
return;
}
throw error;
}
if (!upstreamResponse.ok) {
requestAbort.cleanup();
const rawText = await upstreamResponse.text();
throw upstreamError(extractApiErrorMessage(rawText, 'LLM 上游请求失败'));
}
prepareApiResponse(request, response, {
statusCode: upstreamResponse.status,
headers: {
'Content-Type':
upstreamResponse.headers.get('content-type') ||
'application/json; charset=utf-8',
},
});
if (!upstreamResponse.body) {
requestAbort.cleanup();
response.end();
return;
}
try {
await Readable.fromWeb(upstreamResponse.body as never).pipe(response);
} finally {
requestAbort.cleanup();
}
}
async forwardSseText(params: {
request: ExpressRequest;
systemPrompt: string;
userPrompt: string;
response: ExpressResponse;
model?: string;
}) {
const requestAbort = this.attachRequestAbort(params.request);
let upstreamResponse: globalThis.Response;
try {
upstreamResponse = await this.requestCompletion(
{
model: params.model,
stream: true,
messages: [
{ role: 'system', content: params.systemPrompt },
{ role: 'user', content: params.userPrompt },
],
},
{
signal: requestAbort.signal,
},
);
} catch (error) {
requestAbort.cleanup();
if (requestAbort.signal.aborted && params.response.writableEnded) {
return;
}
throw error;
}
prepareEventStreamResponse(params.request, params.response, {
statusCode: upstreamResponse.status,
headers: {
'Content-Type':
upstreamResponse.headers.get('content-type') ||
'text/event-stream; charset=utf-8',
},
});
if (!upstreamResponse.body) {
requestAbort.cleanup();
params.response.end();
return;
}
try {
await Readable.fromWeb(upstreamResponse.body as never).pipe(
params.response,
);
} finally {
requestAbort.cleanup();
}
}
}