feat(api-server): audit external api failures

This commit is contained in:
kdletters
2026-05-21 16:33:13 +08:00
parent 487efff9c4
commit cc23b6020d
19 changed files with 2266 additions and 56 deletions

View File

@@ -1,21 +1,44 @@
use std::time::Duration;
use std::{error::Error, time::Duration};
use axum::http::StatusCode;
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64_STANDARD};
use reqwest::header;
use serde_json::{Map, Value, json};
use crate::{http_error::AppError, state::AppState};
use crate::{
external_api_audit::{
ExternalApiFailureDraft, app_error_status_class, is_retryable_external_api_failure,
record_external_api_failure,
},
http_error::AppError,
state::AppState,
};
pub(crate) const GPT_IMAGE_2_MODEL: &str = "gpt-image-2";
pub(crate) const VECTOR_ENGINE_GPT_IMAGE_2_MODEL: &str = "gpt-image-2-all";
const VECTOR_ENGINE_PROVIDER: &str = "vector-engine";
#[derive(Clone, Debug)]
#[derive(Clone)]
pub(crate) struct OpenAiImageSettings {
pub base_url: String,
pub api_key: String,
pub request_timeout_ms: u64,
pub external_api_audit_state: Option<AppState>,
}
impl std::fmt::Debug for OpenAiImageSettings {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
formatter
.debug_struct("OpenAiImageSettings")
.field("base_url", &self.base_url)
.field("api_key", &"<redacted>")
.field("request_timeout_ms", &self.request_timeout_ms)
.field(
"external_api_audit_enabled",
&self.external_api_audit_state.is_some(),
)
.finish()
}
}
#[derive(Clone, Debug)]
@@ -74,6 +97,7 @@ pub(crate) fn require_openai_image_settings(
base_url: base_url.to_string(),
api_key: api_key.to_string(),
request_timeout_ms: state.config.vector_engine_image_request_timeout_ms.max(1),
external_api_audit_state: Some(state.clone()),
})
}
@@ -103,15 +127,18 @@ pub(crate) async fn create_openai_image_generation(
reference_images: &[String],
failure_context: &str,
) -> Result<OpenAiGeneratedImages, AppError> {
let request_url = vector_engine_images_generation_url(settings);
let normalized_size = normalize_image_size(size);
let request_body = build_openai_image_request_body(
prompt,
negative_prompt,
size,
normalized_size.as_str(),
candidate_count,
reference_images,
);
let response = http_client
.post(vector_engine_images_generation_url(settings))
let started_at = std::time::Instant::now();
let response = match http_client
.post(request_url.as_str())
.header(
header::AUTHORIZATION,
format!("Bearer {}", settings.api_key),
@@ -121,16 +148,106 @@ pub(crate) async fn create_openai_image_generation(
.json(&request_body)
.send()
.await
.map_err(|error| {
map_openai_image_request_error(format!(
"{failure_context}:创建图片生成任务失败:{error}"
))
})?;
{
Ok(response) => response,
Err(error) => {
let latency_ms = started_at.elapsed().as_millis() as u64;
let timeout = error.is_timeout();
let connect = error.is_connect();
let source = error.source().map(ToString::to_string);
let message = format!("{failure_context}:创建图片生成任务失败:{error}");
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"request_send",
None,
None,
timeout,
connect,
message.as_str(),
source,
None,
Some(latency_ms),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
return Err(map_openai_image_reqwest_error(
format!("{failure_context}:创建图片生成任务失败").as_str(),
request_url.as_str(),
error,
));
}
};
let response_status = response.status();
let response_text = response.text().await.map_err(|error| {
map_openai_image_request_error(format!("{failure_context}:读取图片生成响应失败:{error}"))
})?;
tracing::info!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
status = response_status.as_u16(),
prompt_chars = prompt.chars().count(),
size = %normalized_size,
reference_image_count = reference_images.len(),
elapsed_ms = started_at.elapsed().as_millis() as u64,
failure_context,
"VectorEngine 图片生成 HTTP 返回"
);
let response_text = match response.text().await {
Ok(response_text) => response_text,
Err(error) => {
let latency_ms = started_at.elapsed().as_millis() as u64;
let timeout = error.is_timeout();
let connect = error.is_connect();
let source = error.source().map(ToString::to_string);
let message = format!("{failure_context}:读取图片生成响应失败:{error}");
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"response_body",
Some(response_status.as_u16()),
None,
timeout,
connect,
message.as_str(),
source,
None,
Some(latency_ms),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
return Err(map_openai_image_reqwest_error(
format!("{failure_context}:读取图片生成响应失败").as_str(),
request_url.as_str(),
error,
));
}
};
if !response_status.is_success() {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"upstream_status",
Some(response_status.as_u16()),
None,
false,
false,
parse_api_error_message(response_text.as_str(), failure_context).as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
return Err(map_openai_image_upstream_error(
response_status.as_u16(),
response_text.as_str(),
@@ -138,26 +255,114 @@ pub(crate) async fn create_openai_image_generation(
));
}
let response_json = parse_json_payload(response_text.as_str(), failure_context)?;
let response_json = match parse_json_payload(response_text.as_str(), failure_context) {
Ok(response_json) => response_json,
Err(error) => {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"response_parse",
Some(response_status.as_u16()),
None,
false,
false,
error.body_text().as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
return Err(error);
}
};
let generation_id = extract_generation_id(&response_json.payload)
.unwrap_or_else(|| format!("vector-engine-{}", current_utc_micros()));
let actual_prompt = find_first_string_by_key(&response_json.payload, "revised_prompt")
.or_else(|| find_first_string_by_key(&response_json.payload, "actual_prompt"));
let image_urls = extract_image_urls(&response_json.payload);
if !image_urls.is_empty() {
let mut generated =
download_images_from_urls(http_client, generation_id, image_urls, candidate_count)
.await?;
let download_started_at = std::time::Instant::now();
let mut generated = match download_images_from_urls(
http_client,
generation_id,
image_urls,
candidate_count,
)
.await
{
Ok(generated) => generated,
Err(error) => {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"image_download",
Some(response_status.as_u16()),
Some(app_error_status_class(error.status_code())),
false,
false,
error.body_text().as_str(),
None,
None,
Some(download_started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
return Err(error);
}
};
generated.actual_prompt = actual_prompt;
tracing::info!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
image_count = generated.images.len(),
elapsed_ms = download_started_at.elapsed().as_millis() as u64,
failure_context,
"VectorEngine 图片下载完成"
);
return Ok(generated);
}
let b64_images = extract_b64_images(&response_json.payload);
if !b64_images.is_empty() {
let mut generated = images_from_base64(generation_id, b64_images, candidate_count);
generated.actual_prompt = actual_prompt;
tracing::info!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
image_count = generated.images.len(),
failure_context,
"VectorEngine 图片 base64 解码完成"
);
return Ok(generated);
}
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"missing_image",
Some(response_status.as_u16()),
None,
false,
false,
format!("{failure_context}VectorEngine 未返回图片地址").as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(reference_images.len()),
),
)
.await;
Err(
AppError::from_status(StatusCode::BAD_GATEWAY).with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
@@ -176,6 +381,8 @@ pub(crate) async fn create_openai_image_edit(
failure_context: &str,
) -> Result<OpenAiGeneratedImages, AppError> {
let task_id = format!("vector-engine-edit-{}", current_utc_micros());
let request_url = vector_engine_images_edit_url(settings);
let normalized_size = normalize_image_size(size);
let image_part = reqwest::multipart::Part::bytes(reference_image.bytes.clone())
.file_name(reference_image.file_name.clone())
.mime_str(reference_image.mime_type.as_str())
@@ -190,9 +397,10 @@ pub(crate) async fn create_openai_image_edit(
build_prompt_with_negative(prompt, negative_prompt),
)
.text("n", "1")
.text("size", normalize_image_size(size));
let response = http_client
.post(vector_engine_images_edit_url(settings).as_str())
.text("size", normalized_size.clone());
let started_at = std::time::Instant::now();
let response = match http_client
.post(request_url.as_str())
.header(
header::AUTHORIZATION,
format!("Bearer {}", settings.api_key),
@@ -201,16 +409,106 @@ pub(crate) async fn create_openai_image_edit(
.multipart(form)
.send()
.await
.map_err(|error| {
map_openai_image_request_error(format!(
"{failure_context}:创建图片编辑任务失败:{error}"
))
})?;
{
Ok(response) => response,
Err(error) => {
let latency_ms = started_at.elapsed().as_millis() as u64;
let timeout = error.is_timeout();
let connect = error.is_connect();
let source = error.source().map(ToString::to_string);
let message = format!("{failure_context}:创建图片编辑任务失败:{error}");
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"request_send",
None,
None,
timeout,
connect,
message.as_str(),
source,
None,
Some(latency_ms),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
return Err(map_openai_image_reqwest_error(
format!("{failure_context}:创建图片编辑任务失败").as_str(),
request_url.as_str(),
error,
));
}
};
let response_status = response.status();
let response_text = response.text().await.map_err(|error| {
map_openai_image_request_error(format!("{failure_context}:读取图片编辑响应失败:{error}"))
})?;
tracing::info!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
status = response_status.as_u16(),
prompt_chars = prompt.chars().count(),
size = %normalized_size,
reference_image_count = 1usize,
elapsed_ms = started_at.elapsed().as_millis() as u64,
failure_context,
"VectorEngine 图片编辑 HTTP 返回"
);
let response_text = match response.text().await {
Ok(response_text) => response_text,
Err(error) => {
let latency_ms = started_at.elapsed().as_millis() as u64;
let timeout = error.is_timeout();
let connect = error.is_connect();
let source = error.source().map(ToString::to_string);
let message = format!("{failure_context}:读取图片编辑响应失败:{error}");
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"response_body",
Some(response_status.as_u16()),
None,
timeout,
connect,
message.as_str(),
source,
None,
Some(latency_ms),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
return Err(map_openai_image_reqwest_error(
format!("{failure_context}:读取图片编辑响应失败").as_str(),
request_url.as_str(),
error,
));
}
};
if !response_status.is_success() {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"upstream_status",
Some(response_status.as_u16()),
None,
false,
false,
parse_api_error_message(response_text.as_str(), failure_context).as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
return Err(map_openai_image_upstream_error(
response_status.as_u16(),
response_text.as_str(),
@@ -218,12 +516,62 @@ pub(crate) async fn create_openai_image_edit(
));
}
let response_json = parse_json_payload(response_text.as_str(), failure_context)?;
let response_json = match parse_json_payload(response_text.as_str(), failure_context) {
Ok(response_json) => response_json,
Err(error) => {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"response_parse",
Some(response_status.as_u16()),
None,
false,
false,
error.body_text().as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
return Err(error);
}
};
let actual_prompt = find_first_string_by_key(&response_json.payload, "revised_prompt")
.or_else(|| find_first_string_by_key(&response_json.payload, "actual_prompt"));
let image_urls = extract_image_urls(&response_json.payload);
if !image_urls.is_empty() {
let mut generated = download_images_from_urls(http_client, task_id, image_urls, 1).await?;
let download_started_at = std::time::Instant::now();
let mut generated =
match download_images_from_urls(http_client, task_id, image_urls, 1).await {
Ok(generated) => generated,
Err(error) => {
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"image_download",
Some(response_status.as_u16()),
Some(app_error_status_class(error.status_code())),
false,
false,
error.body_text().as_str(),
None,
None,
Some(download_started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
return Err(error);
}
};
generated.actual_prompt = actual_prompt;
return Ok(generated);
}
@@ -234,6 +582,25 @@ pub(crate) async fn create_openai_image_edit(
return Ok(generated);
}
record_openai_image_failure_if_configured(
settings,
build_openai_image_failure_audit_draft(
request_url.as_str(),
failure_context,
"missing_image",
Some(response_status.as_u16()),
None,
false,
false,
format!("{failure_context}VectorEngine 未返回编辑图片").as_str(),
None,
Some(truncate_raw(response_text.as_str())),
Some(started_at.elapsed().as_millis() as u64),
Some(prompt.chars().count()),
Some(1),
),
)
.await;
Err(
AppError::from_status(StatusCode::BAD_GATEWAY).with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
@@ -402,6 +769,44 @@ fn map_openai_image_request_error(message: String) -> AppError {
}))
}
fn map_openai_image_reqwest_error(
context: &str,
request_url: &str,
error: reqwest::Error,
) -> AppError {
let is_timeout = error.is_timeout();
let is_connect = error.is_connect();
let source = error.source().map(ToString::to_string).unwrap_or_default();
let message = format!("{context}{error}");
let status = if is_timeout {
StatusCode::GATEWAY_TIMEOUT
} else {
StatusCode::BAD_GATEWAY
};
tracing::warn!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
timeout = is_timeout,
connect = is_connect,
request = error.is_request(),
body = error.is_body(),
source = %source,
message = %message,
"VectorEngine 图片请求发送失败"
);
AppError::from_status(status).with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
"message": message,
"endpoint": request_url,
"timeout": is_timeout,
"connect": is_connect,
"request": error.is_request(),
"body": error.is_body(),
"source": source,
}))
}
fn map_openai_image_upstream_error(
upstream_status: u16,
raw_text: &str,
@@ -423,6 +828,53 @@ fn map_openai_image_upstream_error(
}))
}
async fn record_openai_image_failure_if_configured(
settings: &OpenAiImageSettings,
draft: ExternalApiFailureDraft,
) {
if let Some(state) = settings.external_api_audit_state.as_ref() {
record_external_api_failure(state, draft).await;
}
}
fn build_openai_image_failure_audit_draft(
request_url: &str,
failure_context: &str,
failure_stage: &'static str,
status_code: Option<u16>,
status_class: Option<&'static str>,
timeout: bool,
connect: bool,
error_message: &str,
error_source: Option<String>,
raw_excerpt: Option<String>,
latency_ms: Option<u64>,
prompt_chars: Option<usize>,
reference_image_count: Option<usize>,
) -> ExternalApiFailureDraft {
ExternalApiFailureDraft::new(
VECTOR_ENGINE_PROVIDER,
request_url.to_string(),
failure_context.to_string(),
failure_stage,
error_message.to_string(),
)
.with_status_code(status_code)
.with_optional_status_class(status_class)
.with_timeout(timeout)
.with_retryable(is_retryable_external_api_failure(
status_code,
timeout,
connect,
))
.with_error_source(error_source)
.with_raw_excerpt(raw_excerpt)
.with_latency_ms(latency_ms)
.with_prompt_chars(prompt_chars)
.with_reference_image_count(reference_image_count)
.with_image_model(Some(VECTOR_ENGINE_GPT_IMAGE_2_MODEL))
}
fn parse_api_error_message(raw_text: &str, fallback_message: &str) -> String {
if raw_text.trim().is_empty() {
return fallback_message.to_string();
@@ -629,11 +1081,13 @@ mod tests {
base_url: "https://vector.example".to_string(),
api_key: "test-key".to_string(),
request_timeout_ms: 1_000_000,
external_api_audit_state: None,
};
let v1_settings = OpenAiImageSettings {
base_url: "https://vector.example/v1".to_string(),
api_key: "test-key".to_string(),
request_timeout_ms: 1_000_000,
external_api_audit_state: None,
};
assert_eq!(
@@ -658,4 +1112,41 @@ mod tests {
assert_eq!(images.images[0].mime_type, "image/png");
assert_eq!(images.images[0].extension, "png");
}
#[test]
fn vector_engine_upstream_failure_builds_tracking_ready_audit_event() {
let audit = build_openai_image_failure_audit_draft(
"https://vector.example/v1/images/generations",
"拼图 UI 背景图生成失败",
"upstream_status",
Some(429),
None,
false,
false,
"上游限流",
None,
Some("{\"error\":\"rate limited\"}".to_string()),
Some(321),
Some(42),
Some(1),
);
let tracking = crate::external_api_audit::build_external_api_failure_tracking_draft(&audit);
assert_eq!(
tracking.event_key,
crate::external_api_audit::EXTERNAL_API_FAILURE_EVENT_KEY
);
assert_eq!(tracking.scope_id, VECTOR_ENGINE_PROVIDER);
assert_eq!(tracking.metadata["provider"], VECTOR_ENGINE_PROVIDER);
assert_eq!(tracking.metadata["statusCode"], 429);
assert_eq!(tracking.metadata["statusClass"], "4xx");
assert_eq!(tracking.metadata["failureStage"], "upstream_status");
assert_eq!(tracking.metadata["retryable"], true);
assert_eq!(tracking.metadata["promptChars"], 42);
assert_eq!(tracking.metadata["referenceImageCount"], 1);
assert_eq!(
tracking.metadata["imageModel"],
VECTOR_ENGINE_GPT_IMAGE_2_MODEL
);
}
}