1
This commit is contained in:
@@ -18,6 +18,7 @@ pub const DEFAULT_REQUEST_TIMEOUT_MS: u64 = 30_000;
|
||||
pub const DEFAULT_MAX_RETRIES: u32 = 1;
|
||||
pub const DEFAULT_RETRY_BACKOFF_MS: u64 = 500;
|
||||
pub const CHAT_COMPLETIONS_PATH: &str = "/chat/completions";
|
||||
pub const RESPONSES_PATH: &str = "/responses";
|
||||
const DEFAULT_LLM_RAW_LOG_DIR: &str = "logs/llm-raw";
|
||||
|
||||
static LLM_RAW_LOG_SEQUENCE: AtomicU64 = AtomicU64::new(1);
|
||||
@@ -66,6 +67,14 @@ pub struct LlmTextRequest {
|
||||
pub messages: Vec<LlmMessage>,
|
||||
pub max_tokens: Option<u32>,
|
||||
pub enable_web_search: bool,
|
||||
pub protocol: LlmTextProtocol,
|
||||
}
|
||||
|
||||
// 文本协议必须由业务请求显式选择,避免全局默认模型把不同场景混到同一上游形态。
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum LlmTextProtocol {
|
||||
ChatCompletions,
|
||||
Responses,
|
||||
}
|
||||
|
||||
// 上层在流式消费时拿到的是“累计文本 + 当前增量”,避免每层重新自己拼接。
|
||||
@@ -117,9 +126,16 @@ pub struct LlmClient {
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ChatCompletionsRequestBody<'a> {
|
||||
model: &'a str,
|
||||
messages: &'a [LlmMessage],
|
||||
#[serde(untagged)]
|
||||
enum LlmRequestBody {
|
||||
ChatCompletions(ChatCompletionsRequestBody),
|
||||
Responses(ResponsesRequestBody),
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ChatCompletionsRequestBody {
|
||||
model: String,
|
||||
messages: Vec<LlmMessage>,
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
max_tokens: Option<u32>,
|
||||
@@ -130,10 +146,42 @@ struct ChatCompletionsRequestBody<'a> {
|
||||
#[derive(Serialize)]
|
||||
struct ChatCompletionsWebSearchOptions {}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ResponsesRequestBody {
|
||||
model: String,
|
||||
stream: bool,
|
||||
input: Vec<ResponsesInputMessage>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
max_output_tokens: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tools: Option<Vec<ResponsesWebSearchTool>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ResponsesInputMessage {
|
||||
role: &'static str,
|
||||
content: Vec<ResponsesInputContentPart>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ResponsesInputContentPart {
|
||||
#[serde(rename = "type")]
|
||||
part_type: &'static str,
|
||||
text: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ResponsesWebSearchTool {
|
||||
#[serde(rename = "type")]
|
||||
tool_type: &'static str,
|
||||
max_keyword: u8,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct LlmRawFailureInputLog<'a> {
|
||||
provider: &'static str,
|
||||
protocol: &'static str,
|
||||
model: &'a str,
|
||||
stream: bool,
|
||||
attempt: u32,
|
||||
@@ -181,10 +229,48 @@ struct ChatCompletionsContentPart {
|
||||
text: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Deserialize)]
|
||||
struct ResponsesResponseEnvelope {
|
||||
id: Option<String>,
|
||||
model: Option<String>,
|
||||
#[serde(default)]
|
||||
output_text: Option<String>,
|
||||
#[serde(default)]
|
||||
output: Vec<ResponsesOutputItem>,
|
||||
#[serde(default)]
|
||||
status: Option<String>,
|
||||
usage: Option<ResponsesUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponsesOutputItem {
|
||||
#[serde(default)]
|
||||
content: Vec<ResponsesOutputContentPart>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponsesOutputContentPart {
|
||||
#[serde(rename = "type")]
|
||||
#[allow(dead_code)]
|
||||
part_type: Option<String>,
|
||||
#[serde(default)]
|
||||
text: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponsesUsage {
|
||||
#[serde(default)]
|
||||
input_tokens: u64,
|
||||
#[serde(default)]
|
||||
output_tokens: u64,
|
||||
#[serde(default)]
|
||||
total_tokens: u64,
|
||||
}
|
||||
|
||||
struct OpenAiCompatibleSseParser {
|
||||
buffer: String,
|
||||
raw_text: String,
|
||||
protocol: LlmTextProtocol,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -282,6 +368,14 @@ impl LlmConfig {
|
||||
CHAT_COMPLETIONS_PATH.trim_start_matches('/')
|
||||
)
|
||||
}
|
||||
|
||||
pub fn responses_url(&self) -> String {
|
||||
format!(
|
||||
"{}/{}",
|
||||
self.base_url.trim_end_matches('/'),
|
||||
RESPONSES_PATH.trim_start_matches('/')
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl LlmMessage {
|
||||
@@ -312,6 +406,7 @@ impl LlmTextRequest {
|
||||
messages,
|
||||
max_tokens: None,
|
||||
enable_web_search: false,
|
||||
protocol: LlmTextProtocol::ChatCompletions,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,6 +432,11 @@ impl LlmTextRequest {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_responses_api(mut self) -> Self {
|
||||
self.protocol = LlmTextProtocol::Responses;
|
||||
self
|
||||
}
|
||||
|
||||
fn validate(&self) -> Result<(), LlmError> {
|
||||
if self.messages.is_empty() {
|
||||
return Err(LlmError::InvalidRequest(
|
||||
@@ -372,6 +472,15 @@ impl LlmTextRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl LlmTextProtocol {
|
||||
fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::ChatCompletions => "chat_completions",
|
||||
Self::Responses => "responses",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for LlmError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
@@ -430,18 +539,23 @@ impl LlmClient {
|
||||
llm_error
|
||||
})?;
|
||||
|
||||
parse_chat_completions_response(self.config.provider(), &resolved_model, raw_text.as_str())
|
||||
.map_err(|error| {
|
||||
log_llm_raw_failure(
|
||||
&self.config,
|
||||
&request,
|
||||
false,
|
||||
1,
|
||||
"parse_response_failed",
|
||||
raw_text.as_str(),
|
||||
);
|
||||
error
|
||||
})
|
||||
parse_text_response(
|
||||
request.protocol,
|
||||
self.config.provider(),
|
||||
&resolved_model,
|
||||
raw_text.as_str(),
|
||||
)
|
||||
.map_err(|error| {
|
||||
log_llm_raw_failure(
|
||||
&self.config,
|
||||
&request,
|
||||
false,
|
||||
1,
|
||||
"parse_response_failed",
|
||||
raw_text.as_str(),
|
||||
);
|
||||
error
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn request_single_message_text(
|
||||
@@ -470,7 +584,7 @@ impl LlmClient {
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(str::to_string);
|
||||
|
||||
let mut parser = OpenAiCompatibleSseParser::default();
|
||||
let mut parser = OpenAiCompatibleSseParser::new(request.protocol);
|
||||
let mut accumulated_text = String::new();
|
||||
let mut finish_reason = None;
|
||||
let mut undecoded_chunk_bytes = Vec::new();
|
||||
@@ -658,29 +772,27 @@ impl LlmClient {
|
||||
request: &LlmTextRequest,
|
||||
stream: bool,
|
||||
) -> Result<reqwest::Response, LlmError> {
|
||||
let request_body = ChatCompletionsRequestBody {
|
||||
model: request.resolved_model(self.config.model()),
|
||||
messages: request.messages.as_slice(),
|
||||
stream,
|
||||
max_tokens: request.max_tokens,
|
||||
web_search_options: request
|
||||
.enable_web_search
|
||||
.then_some(ChatCompletionsWebSearchOptions {}),
|
||||
let request_body = build_request_body(request, self.config.model(), stream);
|
||||
let model = request.resolved_model(self.config.model());
|
||||
let url = match request.protocol {
|
||||
LlmTextProtocol::ChatCompletions => self.config.chat_completions_url(),
|
||||
LlmTextProtocol::Responses => self.config.responses_url(),
|
||||
};
|
||||
let max_attempts = self.config.max_retries().saturating_add(1);
|
||||
|
||||
for attempt in 1..=max_attempts {
|
||||
debug!(
|
||||
"platform-llm request started: provider={}, stream={}, attempt={}, model={}",
|
||||
"platform-llm request started: provider={}, protocol={}, stream={}, attempt={}, model={}",
|
||||
self.config.provider().as_str(),
|
||||
request.protocol.as_str(),
|
||||
stream,
|
||||
attempt,
|
||||
request_body.model
|
||||
model
|
||||
);
|
||||
|
||||
let send_result = self
|
||||
.http_client
|
||||
.post(self.config.chat_completions_url())
|
||||
.post(url.as_str())
|
||||
.bearer_auth(self.config.api_key())
|
||||
.json(&request_body)
|
||||
.timeout(Duration::from_millis(self.config.request_timeout_ms()))
|
||||
@@ -690,8 +802,9 @@ impl LlmClient {
|
||||
match send_result {
|
||||
Ok(response) if response.status().is_success() => {
|
||||
debug!(
|
||||
"platform-llm request succeeded: provider={}, stream={}, attempt={}, status={}",
|
||||
"platform-llm request succeeded: provider={}, protocol={}, stream={}, attempt={}, status={}",
|
||||
self.config.provider().as_str(),
|
||||
request.protocol.as_str(),
|
||||
stream,
|
||||
attempt,
|
||||
response.status().as_u16()
|
||||
@@ -705,8 +818,9 @@ impl LlmClient {
|
||||
|
||||
if should_retry_status(status) && attempt < max_attempts {
|
||||
warn!(
|
||||
"platform-llm request retrying after upstream status: provider={}, attempt={}, status={}, message={}",
|
||||
"platform-llm request retrying after upstream status: provider={}, protocol={}, attempt={}, status={}, message={}",
|
||||
self.config.provider().as_str(),
|
||||
request.protocol.as_str(),
|
||||
attempt,
|
||||
status.as_u16(),
|
||||
message
|
||||
@@ -731,8 +845,9 @@ impl LlmClient {
|
||||
Err(error) if error.is_timeout() => {
|
||||
if attempt < max_attempts {
|
||||
warn!(
|
||||
"platform-llm request retrying after timeout: provider={}, attempt={}",
|
||||
"platform-llm request retrying after timeout: provider={}, protocol={}, attempt={}",
|
||||
self.config.provider().as_str(),
|
||||
request.protocol.as_str(),
|
||||
attempt
|
||||
);
|
||||
self.sleep_before_retry(attempt).await;
|
||||
@@ -754,8 +869,9 @@ impl LlmClient {
|
||||
let message = error.to_string();
|
||||
if attempt < max_attempts {
|
||||
warn!(
|
||||
"platform-llm request retrying after connectivity failure: provider={}, attempt={}, error={}",
|
||||
"platform-llm request retrying after connectivity failure: provider={}, protocol={}, attempt={}, error={}",
|
||||
self.config.provider().as_str(),
|
||||
request.protocol.as_str(),
|
||||
attempt,
|
||||
message
|
||||
);
|
||||
@@ -810,6 +926,14 @@ impl LlmClient {
|
||||
}
|
||||
|
||||
impl OpenAiCompatibleSseParser {
|
||||
fn new(protocol: LlmTextProtocol) -> Self {
|
||||
Self {
|
||||
buffer: String::new(),
|
||||
raw_text: String::new(),
|
||||
protocol,
|
||||
}
|
||||
}
|
||||
|
||||
fn push_chunk(&mut self, chunk: &str) -> Result<Vec<ParsedStreamEvent>, LlmError> {
|
||||
self.raw_text.push_str(chunk);
|
||||
self.buffer.push_str(chunk);
|
||||
@@ -837,7 +961,7 @@ impl OpenAiCompatibleSseParser {
|
||||
let block = self.buffer[..boundary].to_string();
|
||||
self.buffer = self.buffer[(boundary + 2)..].to_string();
|
||||
|
||||
if let Some(event) = parse_sse_event_block(block.as_str())? {
|
||||
if let Some(event) = parse_sse_event_block(self.protocol, block.as_str())? {
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
@@ -855,6 +979,55 @@ fn normalize_non_empty(value: String, error_message: &str) -> Result<String, Llm
|
||||
Ok(trimmed)
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
request: &LlmTextRequest,
|
||||
fallback_model: &str,
|
||||
stream: bool,
|
||||
) -> LlmRequestBody {
|
||||
match request.protocol {
|
||||
LlmTextProtocol::ChatCompletions => {
|
||||
LlmRequestBody::ChatCompletions(ChatCompletionsRequestBody {
|
||||
model: request.resolved_model(fallback_model).to_string(),
|
||||
messages: request.messages.clone(),
|
||||
stream,
|
||||
max_tokens: request.max_tokens,
|
||||
web_search_options: request
|
||||
.enable_web_search
|
||||
.then_some(ChatCompletionsWebSearchOptions {}),
|
||||
})
|
||||
}
|
||||
LlmTextProtocol::Responses => LlmRequestBody::Responses(ResponsesRequestBody {
|
||||
model: request.resolved_model(fallback_model).to_string(),
|
||||
stream,
|
||||
input: map_responses_input_messages(request.messages.as_slice()),
|
||||
max_output_tokens: request.max_tokens,
|
||||
tools: request.enable_web_search.then(|| {
|
||||
vec![ResponsesWebSearchTool {
|
||||
tool_type: "web_search",
|
||||
max_keyword: 3,
|
||||
}]
|
||||
}),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_responses_input_messages(messages: &[LlmMessage]) -> Vec<ResponsesInputMessage> {
|
||||
messages
|
||||
.iter()
|
||||
.map(|message| ResponsesInputMessage {
|
||||
role: match message.role {
|
||||
LlmMessageRole::System => "system",
|
||||
LlmMessageRole::User => "user",
|
||||
LlmMessageRole::Assistant => "assistant",
|
||||
},
|
||||
content: vec![ResponsesInputContentPart {
|
||||
part_type: "input_text",
|
||||
text: message.content.clone(),
|
||||
}],
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn log_llm_raw_failure(
|
||||
config: &LlmConfig,
|
||||
request: &LlmTextRequest,
|
||||
@@ -890,6 +1063,7 @@ fn write_llm_raw_failure(
|
||||
let model = request.resolved_model(config.model());
|
||||
let input_log = LlmRawFailureInputLog {
|
||||
provider: config.provider().as_str(),
|
||||
protocol: request.protocol.as_str(),
|
||||
model,
|
||||
stream,
|
||||
attempt,
|
||||
@@ -936,6 +1110,20 @@ fn sanitize_log_file_segment(value: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_text_response(
|
||||
protocol: LlmTextProtocol,
|
||||
provider: LlmProvider,
|
||||
fallback_model: &str,
|
||||
raw_text: &str,
|
||||
) -> Result<LlmTextResponse, LlmError> {
|
||||
match protocol {
|
||||
LlmTextProtocol::ChatCompletions => {
|
||||
parse_chat_completions_response(provider, fallback_model, raw_text)
|
||||
}
|
||||
LlmTextProtocol::Responses => parse_responses_response(provider, fallback_model, raw_text),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_chat_completions_response(
|
||||
provider: LlmProvider,
|
||||
fallback_model: &str,
|
||||
@@ -967,6 +1155,56 @@ fn parse_chat_completions_response(
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_responses_response(
|
||||
provider: LlmProvider,
|
||||
fallback_model: &str,
|
||||
raw_text: &str,
|
||||
) -> Result<LlmTextResponse, LlmError> {
|
||||
let parsed: ResponsesResponseEnvelope = serde_json::from_str(raw_text).map_err(|error| {
|
||||
LlmError::Deserialize(format!("解析 LLM Responses JSON 响应失败:{error}"))
|
||||
})?;
|
||||
let content = extract_responses_text(&parsed)
|
||||
.ok_or(LlmError::EmptyResponse)?
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if content.is_empty() {
|
||||
return Err(LlmError::EmptyResponse);
|
||||
}
|
||||
|
||||
Ok(LlmTextResponse {
|
||||
provider,
|
||||
model: parsed.model.unwrap_or_else(|| fallback_model.to_string()),
|
||||
content,
|
||||
finish_reason: parsed.status,
|
||||
response_id: parsed.id,
|
||||
usage: parsed.usage.map(|usage| LlmTokenUsage {
|
||||
prompt_tokens: usage.input_tokens,
|
||||
completion_tokens: usage.output_tokens,
|
||||
total_tokens: usage.total_tokens,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_responses_text(parsed: &ResponsesResponseEnvelope) -> Option<String> {
|
||||
parsed
|
||||
.output_text
|
||||
.as_deref()
|
||||
.map(str::to_string)
|
||||
.filter(|text| !text.is_empty())
|
||||
.or_else(|| {
|
||||
let text = parsed
|
||||
.output
|
||||
.iter()
|
||||
.flat_map(|item| item.content.iter())
|
||||
.filter_map(|part| part.text.as_deref())
|
||||
.collect::<Vec<_>>()
|
||||
.join("");
|
||||
|
||||
if text.is_empty() { None } else { Some(text) }
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_message_text(choice: &ChatCompletionsChoice) -> Option<String> {
|
||||
choice
|
||||
.message
|
||||
@@ -1016,7 +1254,10 @@ fn decode_utf8_stream_chunk(bytes: &[u8]) -> Result<(String, Vec<u8>), LlmError>
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_sse_event_block(block: &str) -> Result<Option<ParsedStreamEvent>, LlmError> {
|
||||
fn parse_sse_event_block(
|
||||
protocol: LlmTextProtocol,
|
||||
block: &str,
|
||||
) -> Result<Option<ParsedStreamEvent>, LlmError> {
|
||||
let data_lines = block
|
||||
.lines()
|
||||
.filter_map(|line| line.trim().strip_prefix("data:"))
|
||||
@@ -1032,6 +1273,10 @@ fn parse_sse_event_block(block: &str) -> Result<Option<ParsedStreamEvent>, LlmEr
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if protocol == LlmTextProtocol::Responses {
|
||||
return parse_responses_sse_event(data.as_str());
|
||||
}
|
||||
|
||||
let parsed: ChatCompletionsResponseEnvelope = serde_json::from_str(data.as_str())
|
||||
.map_err(|error| LlmError::Deserialize(format!("解析 LLM SSE 事件失败:{error}")))?;
|
||||
let first_choice = parsed
|
||||
@@ -1045,6 +1290,44 @@ fn parse_sse_event_block(block: &str) -> Result<Option<ParsedStreamEvent>, LlmEr
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse_responses_sse_event(data: &str) -> Result<Option<ParsedStreamEvent>, LlmError> {
|
||||
let parsed: serde_json::Value = serde_json::from_str(data).map_err(|error| {
|
||||
LlmError::Deserialize(format!("解析 LLM Responses SSE 事件失败:{error}"))
|
||||
})?;
|
||||
let event_type = parsed
|
||||
.get("type")
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.unwrap_or_default();
|
||||
|
||||
match event_type {
|
||||
"response.output_text.delta" => Ok(Some(ParsedStreamEvent {
|
||||
delta_text: parsed
|
||||
.get("delta")
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.map(str::to_string),
|
||||
finish_reason: None,
|
||||
})),
|
||||
"response.completed" => Ok(Some(ParsedStreamEvent {
|
||||
delta_text: None,
|
||||
finish_reason: Some("completed".to_string()),
|
||||
})),
|
||||
"response.failed" | "error" => {
|
||||
let message = parsed
|
||||
.get("error")
|
||||
.and_then(|error| error.get("message"))
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.or_else(|| parsed.get("message").and_then(serde_json::Value::as_str))
|
||||
.unwrap_or("LLM Responses SSE 返回失败事件")
|
||||
.to_string();
|
||||
Err(LlmError::Upstream {
|
||||
status_code: 502,
|
||||
message,
|
||||
})
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn should_retry_status(status: StatusCode) -> bool {
|
||||
status == StatusCode::REQUEST_TIMEOUT
|
||||
|| status == StatusCode::TOO_MANY_REQUESTS
|
||||
@@ -1151,11 +1434,12 @@ mod tests {
|
||||
config.chat_completions_url(),
|
||||
"https://example.com/base/chat/completions"
|
||||
);
|
||||
assert_eq!(config.responses_url(), "https://example.com/base/responses");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sse_parser_handles_split_chunks_and_done_marker() {
|
||||
let mut parser = OpenAiCompatibleSseParser::default();
|
||||
let mut parser = OpenAiCompatibleSseParser::new(LlmTextProtocol::ChatCompletions);
|
||||
let events_a = parser
|
||||
.push_chunk("data: {\"choices\":[{\"delta\":{\"content\":\"你\"}}]}\r\n\r\n")
|
||||
.expect("first chunk should parse");
|
||||
@@ -1170,6 +1454,24 @@ mod tests {
|
||||
assert_eq!(events_b[0].finish_reason.as_deref(), Some("stop"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn responses_sse_parser_only_emits_output_text_delta() {
|
||||
let mut parser = OpenAiCompatibleSseParser::new(LlmTextProtocol::Responses);
|
||||
let events = parser
|
||||
.push_chunk(concat!(
|
||||
"data: {\"type\":\"response.created\"}\n\n",
|
||||
"data: {\"type\":\"response.output_text.delta\",\"delta\":\"你\"}\n\n",
|
||||
"data: {\"type\":\"response.output_text.delta\",\"delta\":\"好\"}\n\n",
|
||||
"data: {\"type\":\"response.completed\"}\n\n",
|
||||
))
|
||||
.expect("responses stream should parse");
|
||||
|
||||
assert_eq!(events.len(), 3);
|
||||
assert_eq!(events[0].delta_text.as_deref(), Some("你"));
|
||||
assert_eq!(events[1].delta_text.as_deref(), Some("好"));
|
||||
assert_eq!(events[2].finish_reason.as_deref(), Some("completed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_utf8_stream_chunk_preserves_incomplete_multibyte_suffix() {
|
||||
let full_bytes = "你好".as_bytes();
|
||||
@@ -1284,6 +1586,72 @@ mod tests {
|
||||
assert_eq!(request_json["web_search_options"], serde_json::json!({}));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn request_text_sends_responses_body_with_web_search_tool() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").expect("listener should bind");
|
||||
let address = listener.local_addr().expect("listener should have addr");
|
||||
let server_handle = thread::spawn(move || {
|
||||
let (mut stream, _) = listener.accept().expect("request should connect");
|
||||
let request_text = read_request(&mut stream);
|
||||
write_response(
|
||||
&mut stream,
|
||||
MockResponse {
|
||||
status_line: "200 OK",
|
||||
content_type: "application/json; charset=utf-8",
|
||||
body: r#"{"id":"resp_responses","model":"deepseek-v3-2-251201","output_text":"Responses 成功","status":"completed","usage":{"input_tokens":9,"output_tokens":4,"total_tokens":13}}"#.to_string(),
|
||||
extra_headers: Vec::new(),
|
||||
},
|
||||
);
|
||||
request_text
|
||||
});
|
||||
|
||||
let client = build_test_client(format!("http://{address}"), 0);
|
||||
let response = client
|
||||
.request_text(
|
||||
LlmTextRequest::single_turn("系统", "用户")
|
||||
.with_model("deepseek-v3-2-251201")
|
||||
.with_responses_api()
|
||||
.with_web_search(true)
|
||||
.with_max_tokens(128),
|
||||
)
|
||||
.await
|
||||
.expect("responses request_text should succeed");
|
||||
|
||||
let request_text = server_handle.join().expect("server thread should join");
|
||||
let request_line = request_text.lines().next().unwrap_or_default();
|
||||
let request_body = request_text
|
||||
.split("\r\n\r\n")
|
||||
.nth(1)
|
||||
.expect("request body should exist");
|
||||
let request_json: serde_json::Value =
|
||||
serde_json::from_str(request_body).expect("request body should be json");
|
||||
|
||||
assert!(request_line.contains("POST /responses HTTP/1.1"));
|
||||
assert_eq!(response.content, "Responses 成功");
|
||||
assert_eq!(response.model, "deepseek-v3-2-251201");
|
||||
assert_eq!(
|
||||
response.usage,
|
||||
Some(LlmTokenUsage {
|
||||
prompt_tokens: 9,
|
||||
completion_tokens: 4,
|
||||
total_tokens: 13,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
request_json["model"],
|
||||
serde_json::json!("deepseek-v3-2-251201")
|
||||
);
|
||||
assert_eq!(request_json["stream"], serde_json::json!(false));
|
||||
assert_eq!(
|
||||
request_json["tools"],
|
||||
serde_json::json!([{ "type": "web_search", "max_keyword": 3 }])
|
||||
);
|
||||
assert_eq!(
|
||||
request_json["input"][0]["content"][0],
|
||||
serde_json::json!({ "type": "input_text", "text": "系统" })
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stream_text_accumulates_sse_response() {
|
||||
let server_url = spawn_mock_server(vec![MockResponse {
|
||||
@@ -1314,6 +1682,41 @@ mod tests {
|
||||
assert_eq!(response.response_id.as_deref(), Some("req_stream_01"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stream_text_accumulates_responses_sse_response() {
|
||||
let server_url = spawn_mock_server(vec![MockResponse {
|
||||
status_line: "200 OK",
|
||||
content_type: "text/event-stream; charset=utf-8",
|
||||
body: concat!(
|
||||
"data: {\"type\":\"response.output_text.delta\",\"delta\":\"你\"}\n\n",
|
||||
"data: {\"type\":\"response.output_text.delta\",\"delta\":\"好\"}\n\n",
|
||||
"data: {\"type\":\"response.completed\"}\n\n"
|
||||
)
|
||||
.to_string(),
|
||||
extra_headers: vec![("x-request-id", "req_responses_stream_01")],
|
||||
}]);
|
||||
|
||||
let client = build_test_client(server_url, 0);
|
||||
let mut updates = Vec::new();
|
||||
let response = client
|
||||
.stream_text(
|
||||
LlmTextRequest::single_turn("系统", "用户").with_responses_api(),
|
||||
|delta| {
|
||||
updates.push(delta.accumulated_text.clone());
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("responses stream_text should succeed");
|
||||
|
||||
assert_eq!(updates, vec!["你".to_string(), "你好".to_string()]);
|
||||
assert_eq!(response.content, "你好");
|
||||
assert_eq!(response.finish_reason.as_deref(), Some("completed"));
|
||||
assert_eq!(
|
||||
response.response_id.as_deref(),
|
||||
Some("req_responses_stream_01")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn request_text_writes_raw_failure_logs_after_parse_error() {
|
||||
let log_dir = std::env::temp_dir().join(format!(
|
||||
|
||||
Reference in New Issue
Block a user