1
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
当前实现只覆盖“文本 chat completion”主链,不提前混入媒体生成和业务编排:
|
||||
|
||||
1. 支持 OpenAI 兼容格式的 JSON 请求与 SSE 增量响应
|
||||
2. 支持按 provider 打标签,但不把业务 prompt、SSE 转发和模块状态写回放进本 crate
|
||||
2. 支持按 provider 打标签,但不把业务 prompt、SSE 转发和模块状态写回本 crate
|
||||
3. `DashScope` 当前只通过“调用方显式提供兼容文本网关 base url”的方式接入,不复用图像 API
|
||||
4. 角色动画、图片、视频、资产轮询仍留在后续 `platform-llm` / `platform-oss` / 业务模块任务里另行实现
|
||||
|
||||
|
||||
@@ -57,7 +57,18 @@ pub enum LlmMessageRole {
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LlmMessage {
|
||||
pub role: LlmMessageRole,
|
||||
// 中文注释:保留纯文本字段兼容 Chat Completions 和既有调用;Responses 多模态请求读取 content_parts。
|
||||
pub content: String,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub content_parts: Vec<LlmMessageContentPart>,
|
||||
}
|
||||
|
||||
// Responses 多模态内容块。字段名按上游 OpenAI 兼容协议保持 snake_case。
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum LlmMessageContentPart {
|
||||
InputText { text: String },
|
||||
InputImage { image_url: String },
|
||||
}
|
||||
|
||||
// 文本补全请求冻结为“消息列表 + 可选模型覆盖 + 可选 max_tokens”最小闭环。
|
||||
@@ -179,10 +190,10 @@ struct ResponsesInputMessage {
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ResponsesInputContentPart {
|
||||
#[serde(rename = "type")]
|
||||
part_type: &'static str,
|
||||
text: String,
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
enum ResponsesInputContentPart {
|
||||
InputText { text: String },
|
||||
InputImage { image_url: String },
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -398,6 +409,7 @@ impl LlmMessage {
|
||||
Self {
|
||||
role,
|
||||
content: content.into(),
|
||||
content_parts: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,6 +424,39 @@ impl LlmMessage {
|
||||
pub fn assistant(content: impl Into<String>) -> Self {
|
||||
Self::new(LlmMessageRole::Assistant, content)
|
||||
}
|
||||
|
||||
pub fn multimodal(role: LlmMessageRole, content_parts: Vec<LlmMessageContentPart>) -> Self {
|
||||
let content = content_parts
|
||||
.iter()
|
||||
.filter_map(|part| match part {
|
||||
LlmMessageContentPart::InputText { text } => Some(text.as_str()),
|
||||
LlmMessageContentPart::InputImage { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
Self {
|
||||
role,
|
||||
content,
|
||||
content_parts,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn user_multimodal(content_parts: Vec<LlmMessageContentPart>) -> Self {
|
||||
Self::multimodal(LlmMessageRole::User, content_parts)
|
||||
}
|
||||
|
||||
pub fn with_image_url(mut self, image_url: impl Into<String>) -> Self {
|
||||
if self.content_parts.is_empty() && !self.content.trim().is_empty() {
|
||||
self.content_parts.push(LlmMessageContentPart::InputText {
|
||||
text: self.content.clone(),
|
||||
});
|
||||
}
|
||||
self.content_parts.push(LlmMessageContentPart::InputImage {
|
||||
image_url: image_url.into(),
|
||||
});
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl LlmTextRequest {
|
||||
@@ -466,9 +511,27 @@ impl LlmTextRequest {
|
||||
}
|
||||
|
||||
for message in &self.messages {
|
||||
if message.content.trim().is_empty() {
|
||||
let has_text = !message.content.trim().is_empty()
|
||||
|| message.content_parts.iter().any(|part| match part {
|
||||
LlmMessageContentPart::InputText { text } => !text.trim().is_empty(),
|
||||
LlmMessageContentPart::InputImage { .. } => false,
|
||||
});
|
||||
let has_image = message.content_parts.iter().any(|part| match part {
|
||||
LlmMessageContentPart::InputImage { image_url } => !image_url.trim().is_empty(),
|
||||
LlmMessageContentPart::InputText { .. } => false,
|
||||
});
|
||||
if !has_text && !has_image {
|
||||
return Err(LlmError::InvalidRequest(
|
||||
"LLM message.content 不能为空".to_string(),
|
||||
"LLM message content 不能为空".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if message.content_parts.iter().any(|part| match part {
|
||||
LlmMessageContentPart::InputText { text } => text.trim().is_empty(),
|
||||
LlmMessageContentPart::InputImage { image_url } => image_url.trim().is_empty(),
|
||||
}) {
|
||||
return Err(LlmError::InvalidRequest(
|
||||
"LLM message content part 不能为空".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -1073,10 +1136,30 @@ fn map_responses_input_messages(messages: &[LlmMessage]) -> Vec<ResponsesInputMe
|
||||
LlmMessageRole::User => "user",
|
||||
LlmMessageRole::Assistant => "assistant",
|
||||
},
|
||||
content: vec![ResponsesInputContentPart {
|
||||
part_type: "input_text",
|
||||
text: message.content.clone(),
|
||||
}],
|
||||
content: map_responses_content_parts(message),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn map_responses_content_parts(message: &LlmMessage) -> Vec<ResponsesInputContentPart> {
|
||||
if message.content_parts.is_empty() {
|
||||
return vec![ResponsesInputContentPart::InputText {
|
||||
text: message.content.clone(),
|
||||
}];
|
||||
}
|
||||
|
||||
message
|
||||
.content_parts
|
||||
.iter()
|
||||
.map(|part| match part {
|
||||
LlmMessageContentPart::InputText { text } => {
|
||||
ResponsesInputContentPart::InputText { text: text.clone() }
|
||||
}
|
||||
LlmMessageContentPart::InputImage { image_url } => {
|
||||
ResponsesInputContentPart::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
@@ -1764,6 +1847,64 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn responses_multimodal_request_sends_input_text_and_input_image() {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").expect("listener should bind");
|
||||
let address = listener.local_addr().expect("listener should have addr");
|
||||
let server_handle = thread::spawn(move || {
|
||||
let (mut stream, _) = listener.accept().expect("request should connect");
|
||||
let request_text = read_request(&mut stream);
|
||||
write_response(
|
||||
&mut stream,
|
||||
MockResponse {
|
||||
status_line: "200 OK",
|
||||
content_type: "application/json; charset=utf-8",
|
||||
body: r#"{"id":"resp_multimodal","model":"gpt-5","output_text":"多模态成功","status":"completed"}"#.to_string(),
|
||||
extra_headers: Vec::new(),
|
||||
},
|
||||
);
|
||||
request_text
|
||||
});
|
||||
|
||||
let client = build_test_client(format!("http://{address}"), 0);
|
||||
let response = client
|
||||
.request_text(
|
||||
LlmTextRequest::new(vec![
|
||||
LlmMessage::system("你是创意互动内容生成 Agent"),
|
||||
LlmMessage::user_multimodal(vec![
|
||||
LlmMessageContentPart::InputText {
|
||||
text: "把这张图做成拼图".to_string(),
|
||||
},
|
||||
LlmMessageContentPart::InputImage {
|
||||
image_url: "https://example.com/ref.png".to_string(),
|
||||
},
|
||||
]),
|
||||
])
|
||||
.with_model("gpt-5")
|
||||
.with_responses_api(),
|
||||
)
|
||||
.await
|
||||
.expect("responses multimodal request_text should succeed");
|
||||
|
||||
let request_text = server_handle.join().expect("server thread should join");
|
||||
let request_body = request_text
|
||||
.split("\r\n\r\n")
|
||||
.nth(1)
|
||||
.expect("request body should exist");
|
||||
let request_json: serde_json::Value =
|
||||
serde_json::from_str(request_body).expect("request body should be json");
|
||||
|
||||
assert_eq!(response.model, "gpt-5");
|
||||
assert_eq!(request_json["model"], serde_json::json!("gpt-5"));
|
||||
assert_eq!(
|
||||
request_json["input"][1]["content"],
|
||||
serde_json::json!([
|
||||
{ "type": "input_text", "text": "把这张图做成拼图" },
|
||||
{ "type": "input_image", "image_url": "https://example.com/ref.png" }
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stream_text_accumulates_sse_response() {
|
||||
let server_url = spawn_mock_server(vec![MockResponse {
|
||||
|
||||
Reference in New Issue
Block a user