From 267895462791ecac58194d7ba9783d1076d973fa Mon Sep 17 00:00:00 2001 From: kdletters <61648117+kdletters@users.noreply.github.com> Date: Thu, 4 Jun 2026 01:22:28 +0800 Subject: [PATCH] fix: log VectorEngine image edit request params --- ...发运维】本地开发验证与生产运维-2026-05-15.md | 4 +- .../src/vector_engine/client.rs | 41 +++++++- .../src/vector_engine/request.rs | 97 ++++++++++++++++++- .../src/vector_engine/transport.rs | 6 ++ 4 files changed, 144 insertions(+), 4 deletions(-) diff --git a/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md b/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md index d62ee632..55514d35 100644 --- a/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md +++ b/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md @@ -292,7 +292,7 @@ OpenTelemetry 现阶段默认开启 OTLP traces / metrics / logs,但本地日 - debug exporter / Rider 转发都会同时接收 traces、metrics 和 logs。 - api-server 会随 metrics 发送进程级指标:`process.memory.usage`、`process.memory.virtual`、`process.cpu.time`、`genarrative.process.cpu.usage_percent`、`process.thread.count`、`genarrative.process.memory.private`;Windows 额外发送 `process.windows.handle.count`,Linux 额外发送 `process.unix.file_descriptor.count`。这些指标只描述当前进程,不携带请求、用户或作品 label。 - HTTP 运行态补充发送 `genarrative.http.server.response_bodies.in_flight` 与 `genarrative.http.server.request_permits.available`,后者带低基数 `pool=default|gallery|detail|admin` label,用于区分业务 handler / 背压 permit 是否仍被占用;拼图广场热点缓存补充发送 `genarrative.puzzle_gallery.cache.*` 指标,记录 fresh hit、stale hit、未命中、后台刷新开始 / 失败、重建耗时和预序列化 data JSON 字节数。 -- 外部 API 失败统一发送 OTLP 并落库。当前 VectorEngine `gpt-image-2` 图片生成 / 编辑失败由 `platform-image` provider 输出低基数字段结构化日志,字段包括 provider、endpoint、failure_stage、status、source、source_chain、source_chain_depth、timeout、retryable、latency_ms、prompt_chars、reference_image_count、image_model 和 raw_excerpt;`api-server` 再记录指标 `genarrative.external_api.failures{provider,failure_stage,status_class,retryable}`,并写入 `tracking_event`,`event_key = external_api_call_failure`、`module_key = external-api`、`scope_kind = module`、`scope_id = provider`。调用方能拿到身份上下文时,失败事件还会在行级 `user_id` / `owner_user_id` / `profile_id` 和 `metadata_json.userId` / `metadata_json.profileId` / `metadata_json.requestId` / `metadata_json.errorSource` 中记录触发者、草稿 / 作品作用域、请求标识和传输错误链。排障时先按 provider / failureStage 聚合,再下钻 userId / profileId,最后结合 request 日志、errorSource 和上游响应 excerpt 判断是限流、超时、解析失败还是未返回图片。 +- 外部 API 失败统一发送 OTLP 并落库。当前 VectorEngine `gpt-image-2` 图片生成 / 编辑失败由 `platform-image` provider 输出结构化日志字段,字段包括 provider、endpoint、failure_stage、status、source、source_chain、source_chain_depth、timeout、retryable、latency_ms、prompt_chars、reference_image_count、image_model、request_params 和 raw_excerpt;图片编辑请求参数日志还会带 reference_image_bytes_total,并在 request_params.referenceImages 中记录每个 multipart `image` part 的 fileName、mimeType 和 bytes,不记录 API key 或原始图片 bytes;`api-server` 再记录指标 `genarrative.external_api.failures{provider,failure_stage,status_class,retryable}`,并写入 `tracking_event`,`event_key = external_api_call_failure`、`module_key = external-api`、`scope_kind = module`、`scope_id = provider`。调用方能拿到身份上下文时,失败事件还会在行级 `user_id` / `owner_user_id` / `profile_id` 和 `metadata_json.userId` / `metadata_json.profileId` / `metadata_json.requestId` / `metadata_json.errorSource` 中记录触发者、草稿 / 作品作用域、请求标识和传输错误链。排障时先按 provider / failureStage 聚合,再下钻 userId / profileId,最后结合 request 日志、errorSource 和上游响应 excerpt 判断是限流、超时、解析失败还是未返回图片。 - SpacetimeDB 观测分为两类:procedure / reducer 调用继续用 `genarrative.spacetime.procedure.*`,订阅本地 cache 读使用 `genarrative.spacetime.read.*`。`read=list_puzzle_gallery` 表示拼图广场当前从 `puzzle_gallery_card_view` 本地 cache 读取,不再每个 HTTP 请求调用 `list_puzzle_gallery` procedure。 - 本地 Windows 直连压测的内存高水位要结合 K6 VU / 连接数解释。250 RPS 下过高 `PREALLOCATED_VUS` 可能让 300 个本地 Established 连接把 `api-server` private memory 瞬时推到 GB 级,且 `/healthz` 小响应也能复现;若压测结束后回落、`response_bodies.in_flight` 和背压 permit 未显示业务积压,应优先按连接 / 发送链路高水位处理,而不是判断为 SpacetimeDB 或 JSON 缓存泄漏。 - Rider 的 Logs 面板只展示 log event 自身字段,不会自动展开父 span 的全部 attributes;请求完成日志会直接带 `request_id`、`http.request.method`、`http.route`、`url.scheme`、`url.path`、`http.response.status_code`、`status_class`、`latency_ms` 和 `slow_request`,完整链路继续到 Traces 面板按 trace/span 查看。 @@ -378,7 +378,7 @@ ORDER BY failures DESC, last_seen DESC LIMIT 100; ``` -VectorEngine `request_send` 且 `timeout = true` 的记录表示 `reqwest::Error::is_timeout()` 判定为超时,常见于连接、发送请求体、等待上游首包或上游长时间无响应;`errorSource` 会保存 reqwest 底层错误链,若只看到 `client error (SendRequest)`,表示 Hyper 只暴露到发送请求阶段,仍不等于最终根因。若 `statusCode` 为空,应优先查同一 `requestId` 的 `api-server` request 日志、provider 日志 `source_chain`、Nginx / 出口网络、VectorEngine 可用性和请求体大小;若已有 `502`、`429 moderation_blocked` 等状态码,则按上游网关或内容审核失败单独处理,不要和传输超时混为一类。 +VectorEngine `request_send` 且 `timeout = true` 的记录表示 `reqwest::Error::is_timeout()` 判定为超时,常见于连接、发送请求体、等待上游首包或上游长时间无响应;`errorSource` 会保存 reqwest 底层错误链,若只看到 `client error (SendRequest)`,表示 Hyper 只暴露到发送请求阶段,仍不等于最终根因。若 `statusCode` 为空,应优先查同一 `requestId` 的 `api-server` request 日志、provider 日志 `source_chain`、request_params、reference_image_bytes_total、Nginx / 出口网络、VectorEngine 可用性和请求体大小;若已有 `502`、`429 moderation_blocked` 等状态码,则按上游网关或内容审核失败单独处理,不要和传输超时混为一类。 tracking outbox 默认配置: diff --git a/server-rs/crates/platform-image/src/vector_engine/client.rs b/server-rs/crates/platform-image/src/vector_engine/client.rs index b7a31084..ee5524a0 100644 --- a/server-rs/crates/platform-image/src/vector_engine/client.rs +++ b/server-rs/crates/platform-image/src/vector_engine/client.rs @@ -5,7 +5,8 @@ use super::{ error::PlatformImageError, image_source::resolve_reference_images, request::{ - build_prompt_with_negative, build_vector_engine_image_request_body, normalize_image_size, + build_prompt_with_negative, build_vector_engine_image_edit_request_log_params, + build_vector_engine_image_request_body, normalize_image_size, vector_engine_images_edit_url, vector_engine_images_generation_url, }, response::handle_vector_engine_response, @@ -71,6 +72,7 @@ pub async fn create_vector_engine_image_generation( started_at.elapsed().as_millis() as u64, Some(prompt.chars().count()), Some(reference_images.len()), + Some(&request_body), )); } }; @@ -97,6 +99,7 @@ pub async fn create_vector_engine_image_generation( started_at.elapsed().as_millis() as u64, Some(prompt.chars().count()), Some(reference_images.len()), + Some(&request_body), )); } }; @@ -156,6 +159,13 @@ pub async fn create_vector_engine_image_edit_with_references( let request_url = vector_engine_images_edit_url(settings); let normalized_size = normalize_image_size(size); + let request_params = build_vector_engine_image_edit_request_log_params( + prompt, + negative_prompt, + normalized_size.as_str(), + candidate_count, + reference_images, + ); let mut form = reqwest::multipart::Form::new() .text("model", GPT_IMAGE_2_MODEL.to_string()) @@ -178,7 +188,32 @@ pub async fn create_vector_engine_image_edit_with_references( } let reference_image_count = reference_images.iter().take(5).count(); + let reference_image_bytes_total: usize = reference_images + .iter() + .take(5) + .map(|image| image.bytes.len()) + .sum(); let started_at = std::time::Instant::now(); + tracing::info!( + provider = VECTOR_ENGINE_PROVIDER, + endpoint = %request_url, + image_model = GPT_IMAGE_2_MODEL, + size = %normalized_size, + candidate_count = candidate_count.clamp(1, 4), + requested_candidate_count = candidate_count, + prompt_chars = prompt.trim().chars().count(), + negative_prompt_chars = negative_prompt + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::chars) + .map(Iterator::count) + .unwrap_or_default(), + reference_image_count, + reference_image_bytes_total, + request_params = %request_params, + failure_context, + "VectorEngine 图片编辑请求参数" + ); let response = match http_client .post(request_url.as_str()) .header( @@ -200,6 +235,7 @@ pub async fn create_vector_engine_image_edit_with_references( started_at.elapsed().as_millis() as u64, Some(prompt.chars().count()), Some(reference_image_count), + Some(&request_params), )); } }; @@ -211,6 +247,8 @@ pub async fn create_vector_engine_image_edit_with_references( prompt_chars = prompt.chars().count(), size = %normalized_size, reference_image_count, + reference_image_bytes_total, + request_params = %request_params, elapsed_ms = started_at.elapsed().as_millis() as u64, failure_context, "VectorEngine 图片编辑 HTTP 返回" @@ -226,6 +264,7 @@ pub async fn create_vector_engine_image_edit_with_references( started_at.elapsed().as_millis() as u64, Some(prompt.chars().count()), Some(reference_image_count), + Some(&request_params), )); } }; diff --git a/server-rs/crates/platform-image/src/vector_engine/request.rs b/server-rs/crates/platform-image/src/vector_engine/request.rs index 10a5c06b..656d07d7 100644 --- a/server-rs/crates/platform-image/src/vector_engine/request.rs +++ b/server-rs/crates/platform-image/src/vector_engine/request.rs @@ -1,6 +1,9 @@ use serde_json::{Map, Value, json}; -use super::{constants::GPT_IMAGE_2_MODEL, types::VectorEngineImageSettings}; +use super::{ + constants::GPT_IMAGE_2_MODEL, + types::{ReferenceImage, VectorEngineImageSettings}, +}; pub fn build_vector_engine_image_request_body( prompt: &str, @@ -56,6 +59,52 @@ pub fn vector_engine_images_edit_url(settings: &VectorEngineImageSettings) -> St } } +pub(crate) fn build_vector_engine_image_edit_request_log_params( + prompt: &str, + negative_prompt: Option<&str>, + size: &str, + candidate_count: u32, + reference_images: &[ReferenceImage], +) -> Value { + let prompt = prompt.trim(); + let negative_prompt = negative_prompt + .map(str::trim) + .filter(|value| !value.is_empty()); + let references: Vec = reference_images + .iter() + .take(5) + .enumerate() + .map(|(index, image)| { + json!({ + "index": index, + "field": "image", + "fileName": image.file_name.as_str(), + "mimeType": image.mime_type.as_str(), + "bytes": image.bytes.len(), + }) + }) + .collect(); + let reference_image_bytes_total: usize = reference_images + .iter() + .take(5) + .map(|image| image.bytes.len()) + .sum(); + + json!({ + "model": GPT_IMAGE_2_MODEL, + "prompt": prompt, + "negativePrompt": negative_prompt.unwrap_or_default(), + "promptChars": prompt.chars().count(), + "negativePromptChars": negative_prompt.map(str::chars).map(Iterator::count), + "n": candidate_count.clamp(1, 4), + "requestedCandidateCount": candidate_count, + "size": size, + "referenceImageCount": references.len(), + "referenceImageBytesTotal": reference_image_bytes_total, + "referenceImages": references, + }) +} + pub(crate) fn build_prompt_with_negative(prompt: &str, negative_prompt: Option<&str>) -> String { let prompt = prompt.trim(); let Some(negative_prompt) = negative_prompt @@ -67,3 +116,49 @@ pub(crate) fn build_prompt_with_negative(prompt: &str, negative_prompt: Option<& format!("{prompt}\n避免:{negative_prompt}") } + +#[cfg(test)] +mod tests { + use super::*; + use crate::vector_engine::types::ReferenceImage; + + #[test] + fn edit_request_log_params_include_reference_image_sizes_without_secrets_or_bytes() { + let params = build_vector_engine_image_edit_request_log_params( + " 拼图参考图重绘 ", + Some(" 文字,水印 "), + "1024x1024", + 9, + &[ + ReferenceImage { + bytes: vec![1, 2, 3, 4, 5], + mime_type: "image/png".to_string(), + file_name: "reference-a.png".to_string(), + }, + ReferenceImage { + bytes: vec![8; 7], + mime_type: "image/jpeg".to_string(), + file_name: "reference-b.jpg".to_string(), + }, + ], + ); + + assert_eq!(params["model"], GPT_IMAGE_2_MODEL); + assert_eq!(params["prompt"], "拼图参考图重绘"); + assert_eq!(params["negativePrompt"], "文字,水印"); + assert_eq!(params["n"], 4); + assert_eq!(params["requestedCandidateCount"], 9); + assert_eq!(params["size"], "1024x1024"); + assert_eq!(params["referenceImageCount"], 2); + assert_eq!(params["referenceImageBytesTotal"], 12); + assert_eq!(params["referenceImages"][0]["field"], "image"); + assert_eq!(params["referenceImages"][0]["fileName"], "reference-a.png"); + assert_eq!(params["referenceImages"][0]["mimeType"], "image/png"); + assert_eq!(params["referenceImages"][0]["bytes"], 5); + + let serialized = params.to_string(); + assert!(!serialized.contains("api_key")); + assert!(!serialized.contains("Bearer")); + assert!(!serialized.contains("[1,2,3,4,5]")); + } +} diff --git a/server-rs/crates/platform-image/src/vector_engine/transport.rs b/server-rs/crates/platform-image/src/vector_engine/transport.rs index a40819da..c74d6e04 100644 --- a/server-rs/crates/platform-image/src/vector_engine/transport.rs +++ b/server-rs/crates/platform-image/src/vector_engine/transport.rs @@ -1,5 +1,7 @@ use std::{error::Error, time::Duration}; +use serde_json::Value; + use super::{ audit::build_failure_audit, constants::VECTOR_ENGINE_PROVIDER, error::PlatformImageError, types::VectorEngineImageSettings, @@ -27,6 +29,7 @@ pub(super) fn map_reqwest_error( latency_ms: u64, prompt_chars: Option, reference_image_count: Option, + request_params: Option<&Value>, ) -> PlatformImageError { let is_timeout = error.is_timeout(); let is_connect = error.is_connect(); @@ -70,6 +73,9 @@ pub(super) fn map_reqwest_error( elapsed_ms = latency_ms, prompt_chars, reference_image_count, + request_params = %request_params + .map(|value| value.to_string()) + .unwrap_or_default(), "VectorEngine 图片请求发送失败" );