fix: log VectorEngine image edit request params

This commit is contained in:
kdletters
2026-06-04 01:22:28 +08:00
parent ef236fc3a7
commit 2678954627
4 changed files with 144 additions and 4 deletions

View File

@@ -292,7 +292,7 @@ OpenTelemetry 现阶段默认开启 OTLP traces / metrics / logs但本地日
- debug exporter / Rider 转发都会同时接收 traces、metrics 和 logs。
- api-server 会随 metrics 发送进程级指标:`process.memory.usage``process.memory.virtual``process.cpu.time``genarrative.process.cpu.usage_percent``process.thread.count``genarrative.process.memory.private`Windows 额外发送 `process.windows.handle.count`Linux 额外发送 `process.unix.file_descriptor.count`。这些指标只描述当前进程,不携带请求、用户或作品 label。
- HTTP 运行态补充发送 `genarrative.http.server.response_bodies.in_flight``genarrative.http.server.request_permits.available`,后者带低基数 `pool=default|gallery|detail|admin` label用于区分业务 handler / 背压 permit 是否仍被占用;拼图广场热点缓存补充发送 `genarrative.puzzle_gallery.cache.*` 指标,记录 fresh hit、stale hit、未命中、后台刷新开始 / 失败、重建耗时和预序列化 data JSON 字节数。
- 外部 API 失败统一发送 OTLP 并落库。当前 VectorEngine `gpt-image-2` 图片生成 / 编辑失败由 `platform-image` provider 输出低基数字段结构化日志,字段包括 provider、endpoint、failure_stage、status、source、source_chain、source_chain_depth、timeout、retryable、latency_ms、prompt_chars、reference_image_count、image_model 和 raw_excerpt`api-server` 再记录指标 `genarrative.external_api.failures{provider,failure_stage,status_class,retryable}`,并写入 `tracking_event``event_key = external_api_call_failure``module_key = external-api``scope_kind = module``scope_id = provider`。调用方能拿到身份上下文时,失败事件还会在行级 `user_id` / `owner_user_id` / `profile_id``metadata_json.userId` / `metadata_json.profileId` / `metadata_json.requestId` / `metadata_json.errorSource` 中记录触发者、草稿 / 作品作用域、请求标识和传输错误链。排障时先按 provider / failureStage 聚合,再下钻 userId / profileId最后结合 request 日志、errorSource 和上游响应 excerpt 判断是限流、超时、解析失败还是未返回图片。
- 外部 API 失败统一发送 OTLP 并落库。当前 VectorEngine `gpt-image-2` 图片生成 / 编辑失败由 `platform-image` provider 输出结构化日志字段,字段包括 provider、endpoint、failure_stage、status、source、source_chain、source_chain_depth、timeout、retryable、latency_ms、prompt_chars、reference_image_count、image_model、request_params 和 raw_excerpt图片编辑请求参数日志还会带 reference_image_bytes_total并在 request_params.referenceImages 中记录每个 multipart `image` part 的 fileName、mimeType 和 bytes不记录 API key 或原始图片 bytes`api-server` 再记录指标 `genarrative.external_api.failures{provider,failure_stage,status_class,retryable}`,并写入 `tracking_event``event_key = external_api_call_failure``module_key = external-api``scope_kind = module``scope_id = provider`。调用方能拿到身份上下文时,失败事件还会在行级 `user_id` / `owner_user_id` / `profile_id``metadata_json.userId` / `metadata_json.profileId` / `metadata_json.requestId` / `metadata_json.errorSource` 中记录触发者、草稿 / 作品作用域、请求标识和传输错误链。排障时先按 provider / failureStage 聚合,再下钻 userId / profileId最后结合 request 日志、errorSource 和上游响应 excerpt 判断是限流、超时、解析失败还是未返回图片。
- SpacetimeDB 观测分为两类procedure / reducer 调用继续用 `genarrative.spacetime.procedure.*`,订阅本地 cache 读使用 `genarrative.spacetime.read.*``read=list_puzzle_gallery` 表示拼图广场当前从 `puzzle_gallery_card_view` 本地 cache 读取,不再每个 HTTP 请求调用 `list_puzzle_gallery` procedure。
- 本地 Windows 直连压测的内存高水位要结合 K6 VU / 连接数解释。250 RPS 下过高 `PREALLOCATED_VUS` 可能让 300 个本地 Established 连接把 `api-server` private memory 瞬时推到 GB 级,且 `/healthz` 小响应也能复现;若压测结束后回落、`response_bodies.in_flight` 和背压 permit 未显示业务积压,应优先按连接 / 发送链路高水位处理,而不是判断为 SpacetimeDB 或 JSON 缓存泄漏。
- Rider 的 Logs 面板只展示 log event 自身字段,不会自动展开父 span 的全部 attributes请求完成日志会直接带 `request_id``http.request.method``http.route``url.scheme``url.path``http.response.status_code``status_class``latency_ms``slow_request`,完整链路继续到 Traces 面板按 trace/span 查看。
@@ -378,7 +378,7 @@ ORDER BY failures DESC, last_seen DESC
LIMIT 100;
```
VectorEngine `request_send``timeout = true` 的记录表示 `reqwest::Error::is_timeout()` 判定为超时,常见于连接、发送请求体、等待上游首包或上游长时间无响应;`errorSource` 会保存 reqwest 底层错误链,若只看到 `client error (SendRequest)`,表示 Hyper 只暴露到发送请求阶段,仍不等于最终根因。若 `statusCode` 为空,应优先查同一 `requestId``api-server` request 日志、provider 日志 `source_chain`、Nginx / 出口网络、VectorEngine 可用性和请求体大小;若已有 `502``429 moderation_blocked` 等状态码,则按上游网关或内容审核失败单独处理,不要和传输超时混为一类。
VectorEngine `request_send``timeout = true` 的记录表示 `reqwest::Error::is_timeout()` 判定为超时,常见于连接、发送请求体、等待上游首包或上游长时间无响应;`errorSource` 会保存 reqwest 底层错误链,若只看到 `client error (SendRequest)`,表示 Hyper 只暴露到发送请求阶段,仍不等于最终根因。若 `statusCode` 为空,应优先查同一 `requestId``api-server` request 日志、provider 日志 `source_chain`request_params、reference_image_bytes_total、Nginx / 出口网络、VectorEngine 可用性和请求体大小;若已有 `502``429 moderation_blocked` 等状态码,则按上游网关或内容审核失败单独处理,不要和传输超时混为一类。
tracking outbox 默认配置:

View File

@@ -5,7 +5,8 @@ use super::{
error::PlatformImageError,
image_source::resolve_reference_images,
request::{
build_prompt_with_negative, build_vector_engine_image_request_body, normalize_image_size,
build_prompt_with_negative, build_vector_engine_image_edit_request_log_params,
build_vector_engine_image_request_body, normalize_image_size,
vector_engine_images_edit_url, vector_engine_images_generation_url,
},
response::handle_vector_engine_response,
@@ -71,6 +72,7 @@ pub async fn create_vector_engine_image_generation(
started_at.elapsed().as_millis() as u64,
Some(prompt.chars().count()),
Some(reference_images.len()),
Some(&request_body),
));
}
};
@@ -97,6 +99,7 @@ pub async fn create_vector_engine_image_generation(
started_at.elapsed().as_millis() as u64,
Some(prompt.chars().count()),
Some(reference_images.len()),
Some(&request_body),
));
}
};
@@ -156,6 +159,13 @@ pub async fn create_vector_engine_image_edit_with_references(
let request_url = vector_engine_images_edit_url(settings);
let normalized_size = normalize_image_size(size);
let request_params = build_vector_engine_image_edit_request_log_params(
prompt,
negative_prompt,
normalized_size.as_str(),
candidate_count,
reference_images,
);
let mut form = reqwest::multipart::Form::new()
.text("model", GPT_IMAGE_2_MODEL.to_string())
@@ -178,7 +188,32 @@ pub async fn create_vector_engine_image_edit_with_references(
}
let reference_image_count = reference_images.iter().take(5).count();
let reference_image_bytes_total: usize = reference_images
.iter()
.take(5)
.map(|image| image.bytes.len())
.sum();
let started_at = std::time::Instant::now();
tracing::info!(
provider = VECTOR_ENGINE_PROVIDER,
endpoint = %request_url,
image_model = GPT_IMAGE_2_MODEL,
size = %normalized_size,
candidate_count = candidate_count.clamp(1, 4),
requested_candidate_count = candidate_count,
prompt_chars = prompt.trim().chars().count(),
negative_prompt_chars = negative_prompt
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::chars)
.map(Iterator::count)
.unwrap_or_default(),
reference_image_count,
reference_image_bytes_total,
request_params = %request_params,
failure_context,
"VectorEngine 图片编辑请求参数"
);
let response = match http_client
.post(request_url.as_str())
.header(
@@ -200,6 +235,7 @@ pub async fn create_vector_engine_image_edit_with_references(
started_at.elapsed().as_millis() as u64,
Some(prompt.chars().count()),
Some(reference_image_count),
Some(&request_params),
));
}
};
@@ -211,6 +247,8 @@ pub async fn create_vector_engine_image_edit_with_references(
prompt_chars = prompt.chars().count(),
size = %normalized_size,
reference_image_count,
reference_image_bytes_total,
request_params = %request_params,
elapsed_ms = started_at.elapsed().as_millis() as u64,
failure_context,
"VectorEngine 图片编辑 HTTP 返回"
@@ -226,6 +264,7 @@ pub async fn create_vector_engine_image_edit_with_references(
started_at.elapsed().as_millis() as u64,
Some(prompt.chars().count()),
Some(reference_image_count),
Some(&request_params),
));
}
};

View File

@@ -1,6 +1,9 @@
use serde_json::{Map, Value, json};
use super::{constants::GPT_IMAGE_2_MODEL, types::VectorEngineImageSettings};
use super::{
constants::GPT_IMAGE_2_MODEL,
types::{ReferenceImage, VectorEngineImageSettings},
};
pub fn build_vector_engine_image_request_body(
prompt: &str,
@@ -56,6 +59,52 @@ pub fn vector_engine_images_edit_url(settings: &VectorEngineImageSettings) -> St
}
}
pub(crate) fn build_vector_engine_image_edit_request_log_params(
prompt: &str,
negative_prompt: Option<&str>,
size: &str,
candidate_count: u32,
reference_images: &[ReferenceImage],
) -> Value {
let prompt = prompt.trim();
let negative_prompt = negative_prompt
.map(str::trim)
.filter(|value| !value.is_empty());
let references: Vec<Value> = reference_images
.iter()
.take(5)
.enumerate()
.map(|(index, image)| {
json!({
"index": index,
"field": "image",
"fileName": image.file_name.as_str(),
"mimeType": image.mime_type.as_str(),
"bytes": image.bytes.len(),
})
})
.collect();
let reference_image_bytes_total: usize = reference_images
.iter()
.take(5)
.map(|image| image.bytes.len())
.sum();
json!({
"model": GPT_IMAGE_2_MODEL,
"prompt": prompt,
"negativePrompt": negative_prompt.unwrap_or_default(),
"promptChars": prompt.chars().count(),
"negativePromptChars": negative_prompt.map(str::chars).map(Iterator::count),
"n": candidate_count.clamp(1, 4),
"requestedCandidateCount": candidate_count,
"size": size,
"referenceImageCount": references.len(),
"referenceImageBytesTotal": reference_image_bytes_total,
"referenceImages": references,
})
}
pub(crate) fn build_prompt_with_negative(prompt: &str, negative_prompt: Option<&str>) -> String {
let prompt = prompt.trim();
let Some(negative_prompt) = negative_prompt
@@ -67,3 +116,49 @@ pub(crate) fn build_prompt_with_negative(prompt: &str, negative_prompt: Option<&
format!("{prompt}\n避免:{negative_prompt}")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vector_engine::types::ReferenceImage;
#[test]
fn edit_request_log_params_include_reference_image_sizes_without_secrets_or_bytes() {
let params = build_vector_engine_image_edit_request_log_params(
" 拼图参考图重绘 ",
Some(" 文字,水印 "),
"1024x1024",
9,
&[
ReferenceImage {
bytes: vec![1, 2, 3, 4, 5],
mime_type: "image/png".to_string(),
file_name: "reference-a.png".to_string(),
},
ReferenceImage {
bytes: vec![8; 7],
mime_type: "image/jpeg".to_string(),
file_name: "reference-b.jpg".to_string(),
},
],
);
assert_eq!(params["model"], GPT_IMAGE_2_MODEL);
assert_eq!(params["prompt"], "拼图参考图重绘");
assert_eq!(params["negativePrompt"], "文字,水印");
assert_eq!(params["n"], 4);
assert_eq!(params["requestedCandidateCount"], 9);
assert_eq!(params["size"], "1024x1024");
assert_eq!(params["referenceImageCount"], 2);
assert_eq!(params["referenceImageBytesTotal"], 12);
assert_eq!(params["referenceImages"][0]["field"], "image");
assert_eq!(params["referenceImages"][0]["fileName"], "reference-a.png");
assert_eq!(params["referenceImages"][0]["mimeType"], "image/png");
assert_eq!(params["referenceImages"][0]["bytes"], 5);
let serialized = params.to_string();
assert!(!serialized.contains("api_key"));
assert!(!serialized.contains("Bearer"));
assert!(!serialized.contains("[1,2,3,4,5]"));
}
}

View File

@@ -1,5 +1,7 @@
use std::{error::Error, time::Duration};
use serde_json::Value;
use super::{
audit::build_failure_audit, constants::VECTOR_ENGINE_PROVIDER, error::PlatformImageError,
types::VectorEngineImageSettings,
@@ -27,6 +29,7 @@ pub(super) fn map_reqwest_error(
latency_ms: u64,
prompt_chars: Option<usize>,
reference_image_count: Option<usize>,
request_params: Option<&Value>,
) -> PlatformImageError {
let is_timeout = error.is_timeout();
let is_connect = error.is_connect();
@@ -70,6 +73,9 @@ pub(super) fn map_reqwest_error(
elapsed_ms = latency_ms,
prompt_chars,
reference_image_count,
request_params = %request_params
.map(|value| value.to_string())
.unwrap_or_default(),
"VectorEngine 图片请求发送失败"
);