This commit is contained in:
2026-05-08 11:44:42 +08:00
parent b08127031c
commit abf1f1ebea
249 changed files with 39411 additions and 887 deletions

View File

@@ -16,7 +16,7 @@
当前实现只覆盖“文本 chat completion”主链不提前混入媒体生成和业务编排
1. 支持 OpenAI 兼容格式的 JSON 请求与 SSE 增量响应
2. 支持按 provider 打标签,但不把业务 prompt、SSE 转发和模块状态写回放进本 crate
2. 支持按 provider 打标签,但不把业务 prompt、SSE 转发和模块状态写回本 crate
3. `DashScope` 当前只通过“调用方显式提供兼容文本网关 base url”的方式接入不复用图像 API
4. 角色动画、图片、视频、资产轮询仍留在后续 `platform-llm` / `platform-oss` / 业务模块任务里另行实现

View File

@@ -57,7 +57,18 @@ pub enum LlmMessageRole {
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct LlmMessage {
pub role: LlmMessageRole,
// 中文注释:保留纯文本字段兼容 Chat Completions 和既有调用Responses 多模态请求读取 content_parts。
pub content: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub content_parts: Vec<LlmMessageContentPart>,
}
// Responses 多模态内容块。字段名按上游 OpenAI 兼容协议保持 snake_case。
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum LlmMessageContentPart {
InputText { text: String },
InputImage { image_url: String },
}
// 文本补全请求冻结为“消息列表 + 可选模型覆盖 + 可选 max_tokens”最小闭环。
@@ -179,10 +190,10 @@ struct ResponsesInputMessage {
}
#[derive(Serialize)]
struct ResponsesInputContentPart {
#[serde(rename = "type")]
part_type: &'static str,
text: String,
#[serde(tag = "type", rename_all = "snake_case")]
enum ResponsesInputContentPart {
InputText { text: String },
InputImage { image_url: String },
}
#[derive(Serialize)]
@@ -398,6 +409,7 @@ impl LlmMessage {
Self {
role,
content: content.into(),
content_parts: Vec::new(),
}
}
@@ -412,6 +424,39 @@ impl LlmMessage {
pub fn assistant(content: impl Into<String>) -> Self {
Self::new(LlmMessageRole::Assistant, content)
}
pub fn multimodal(role: LlmMessageRole, content_parts: Vec<LlmMessageContentPart>) -> Self {
let content = content_parts
.iter()
.filter_map(|part| match part {
LlmMessageContentPart::InputText { text } => Some(text.as_str()),
LlmMessageContentPart::InputImage { .. } => None,
})
.collect::<Vec<_>>()
.join("\n");
Self {
role,
content,
content_parts,
}
}
pub fn user_multimodal(content_parts: Vec<LlmMessageContentPart>) -> Self {
Self::multimodal(LlmMessageRole::User, content_parts)
}
pub fn with_image_url(mut self, image_url: impl Into<String>) -> Self {
if self.content_parts.is_empty() && !self.content.trim().is_empty() {
self.content_parts.push(LlmMessageContentPart::InputText {
text: self.content.clone(),
});
}
self.content_parts.push(LlmMessageContentPart::InputImage {
image_url: image_url.into(),
});
self
}
}
impl LlmTextRequest {
@@ -466,9 +511,27 @@ impl LlmTextRequest {
}
for message in &self.messages {
if message.content.trim().is_empty() {
let has_text = !message.content.trim().is_empty()
|| message.content_parts.iter().any(|part| match part {
LlmMessageContentPart::InputText { text } => !text.trim().is_empty(),
LlmMessageContentPart::InputImage { .. } => false,
});
let has_image = message.content_parts.iter().any(|part| match part {
LlmMessageContentPart::InputImage { image_url } => !image_url.trim().is_empty(),
LlmMessageContentPart::InputText { .. } => false,
});
if !has_text && !has_image {
return Err(LlmError::InvalidRequest(
"LLM message.content 不能为空".to_string(),
"LLM message content 不能为空".to_string(),
));
}
if message.content_parts.iter().any(|part| match part {
LlmMessageContentPart::InputText { text } => text.trim().is_empty(),
LlmMessageContentPart::InputImage { image_url } => image_url.trim().is_empty(),
}) {
return Err(LlmError::InvalidRequest(
"LLM message content part 不能为空".to_string(),
));
}
}
@@ -1073,10 +1136,30 @@ fn map_responses_input_messages(messages: &[LlmMessage]) -> Vec<ResponsesInputMe
LlmMessageRole::User => "user",
LlmMessageRole::Assistant => "assistant",
},
content: vec![ResponsesInputContentPart {
part_type: "input_text",
text: message.content.clone(),
}],
content: map_responses_content_parts(message),
})
.collect()
}
fn map_responses_content_parts(message: &LlmMessage) -> Vec<ResponsesInputContentPart> {
if message.content_parts.is_empty() {
return vec![ResponsesInputContentPart::InputText {
text: message.content.clone(),
}];
}
message
.content_parts
.iter()
.map(|part| match part {
LlmMessageContentPart::InputText { text } => {
ResponsesInputContentPart::InputText { text: text.clone() }
}
LlmMessageContentPart::InputImage { image_url } => {
ResponsesInputContentPart::InputImage {
image_url: image_url.clone(),
}
}
})
.collect()
}
@@ -1764,6 +1847,64 @@ mod tests {
);
}
#[tokio::test]
async fn responses_multimodal_request_sends_input_text_and_input_image() {
let listener = TcpListener::bind("127.0.0.1:0").expect("listener should bind");
let address = listener.local_addr().expect("listener should have addr");
let server_handle = thread::spawn(move || {
let (mut stream, _) = listener.accept().expect("request should connect");
let request_text = read_request(&mut stream);
write_response(
&mut stream,
MockResponse {
status_line: "200 OK",
content_type: "application/json; charset=utf-8",
body: r#"{"id":"resp_multimodal","model":"gpt-5","output_text":"","status":"completed"}"#.to_string(),
extra_headers: Vec::new(),
},
);
request_text
});
let client = build_test_client(format!("http://{address}"), 0);
let response = client
.request_text(
LlmTextRequest::new(vec![
LlmMessage::system("你是创意互动内容生成 Agent"),
LlmMessage::user_multimodal(vec![
LlmMessageContentPart::InputText {
text: "把这张图做成拼图".to_string(),
},
LlmMessageContentPart::InputImage {
image_url: "https://example.com/ref.png".to_string(),
},
]),
])
.with_model("gpt-5")
.with_responses_api(),
)
.await
.expect("responses multimodal request_text should succeed");
let request_text = server_handle.join().expect("server thread should join");
let request_body = request_text
.split("\r\n\r\n")
.nth(1)
.expect("request body should exist");
let request_json: serde_json::Value =
serde_json::from_str(request_body).expect("request body should be json");
assert_eq!(response.model, "gpt-5");
assert_eq!(request_json["model"], serde_json::json!("gpt-5"));
assert_eq!(
request_json["input"][1]["content"],
serde_json::json!([
{ "type": "input_text", "text": "把这张图做成拼图" },
{ "type": "input_image", "image_url": "https://example.com/ref.png" }
])
);
}
#[tokio::test]
async fn stream_text_accumulates_sse_response() {
let server_url = spawn_mock_server(vec![MockResponse {