Update Match3D/image-generation docs & code

Adds/updates documentation, assets and implementation for Match3D and puzzle image generation workflows. Key changes: decision logs and pitfalls updated to prefer VectorEngine Gemini for Match3D material sheets and to require edits (multipart) for 1:1 container reference images; guidance added for when to use APIMart vs VectorEngine. .env.example clarified APIMart/Responses config. Many new public assets and PPT visuals added. Code changes across frontend and backend: updated shared contracts, server-rs match3d/puzzle/image-generation handlers, VectorEngine/OpenAI image generation clients, and multiple React components/tests to handle UI/background/container image signing, edits workflow, and puzzle UI background resolution. Added src/services/puzzle-runtime/puzzleUiBackgroundSource.ts and related test updates. Includes notes about multipart HTTP/1.1 requirement and test/verification commands in docs.
This commit is contained in:
2026-05-14 20:34:45 +08:00
parent d33c937ebc
commit 548db78ca7
103 changed files with 6687 additions and 3270 deletions

View File

@@ -65,16 +65,6 @@ struct AudioAssetBindingTarget {
storage_scope: String,
}
#[derive(Clone, Debug)]
pub(crate) struct GeneratedCreationAudioTarget {
pub entity_kind: String,
pub entity_id: String,
pub slot: String,
pub asset_kind: String,
pub profile_id: Option<String>,
pub storage_prefix: LegacyAssetPrefix,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum AudioAssetSlot {
BackgroundMusic,
@@ -173,21 +163,13 @@ pub async fn create_visual_novel_background_music_task(
}
pub async fn create_background_music_task(
State(state): State<AppState>,
State(_state): State<AppState>,
axum::extract::Extension(request_context): axum::extract::Extension<RequestContext>,
payload: Result<Json<creation_audio::CreateBackgroundMusicRequest>, JsonRejection>,
) -> Result<Json<Value>, Response> {
let Json(payload) = parse_json_payload(&request_context, payload)?;
create_background_music_task_response(
&state,
payload.prompt,
payload.title,
payload.tags,
payload.model,
)
.await
.map(|payload| json_success_body(Some(&request_context), payload))
.map_err(|error| error.into_response_with_context(Some(&request_context)))
let _ = parse_json_payload(&request_context, payload)?;
Err(creation_audio_generation_disabled_error()
.into_response_with_context(Some(&request_context)))
}
pub async fn create_visual_novel_sound_effect_task(
@@ -241,210 +223,13 @@ pub async fn create_visual_novel_sound_effect_task(
}
pub async fn create_sound_effect_task(
State(state): State<AppState>,
State(_state): State<AppState>,
axum::extract::Extension(request_context): axum::extract::Extension<RequestContext>,
payload: Result<Json<creation_audio::CreateSoundEffectRequest>, JsonRejection>,
) -> Result<Json<Value>, Response> {
let Json(payload) = parse_json_payload(&request_context, payload)?;
create_sound_effect_task_response(&state, payload.prompt, payload.duration, payload.seed)
.await
.map(|payload| json_success_body(Some(&request_context), payload))
.map_err(|error| error.into_response_with_context(Some(&request_context)))
}
pub(crate) async fn generate_sound_effect_asset_for_creation(
state: &AppState,
owner_user_id: &str,
prompt: String,
duration: Option<u8>,
seed: Option<u64>,
target: GeneratedCreationAudioTarget,
) -> Result<creation_audio::CreationAudioAsset, AppError> {
let normalized_prompt = normalize_limited_text(&prompt, "prompt", VIDU_PROMPT_MAX_CHARS)?;
let task =
create_sound_effect_task_response(state, normalized_prompt.clone(), duration, seed).await?;
let target = AudioAssetBindingTarget {
storage_scope: target.entity_kind.clone(),
entity_kind: target.entity_kind,
entity_id: target.entity_id,
slot: target.slot,
asset_kind: target.asset_kind,
profile_id: target.profile_id,
storage_prefix: target.storage_prefix,
};
let generated = wait_for_generated_audio_asset(
state,
owner_user_id,
task.task_id.clone(),
AudioAssetSlot::SoundEffect,
target,
)
.await?;
let audio_src = generated
.audio_src
.ok_or_else(|| vector_engine_bad_gateway("音效生成完成但缺少播放地址"))?;
Ok(creation_audio::CreationAudioAsset {
task_id: generated.task_id,
provider: generated.provider,
asset_object_id: generated.asset_object_id,
asset_kind: generated.asset_kind,
audio_src,
prompt: Some(normalized_prompt),
title: None,
updated_at: Some(current_utc_iso_text()),
})
}
pub(crate) async fn generate_background_music_asset_for_creation(
state: &AppState,
owner_user_id: &str,
prompt: String,
title: String,
tags: Option<String>,
model: Option<String>,
target: GeneratedCreationAudioTarget,
) -> Result<creation_audio::CreationAudioAsset, AppError> {
let normalized_prompt =
normalize_limited_text_allow_empty(&prompt, "prompt", SUNO_PROMPT_MAX_CHARS)?;
let normalized_title = normalize_limited_text(&title, "title", SUNO_TITLE_MAX_CHARS)?;
let task = create_background_music_task_response(
state,
normalized_prompt.clone(),
normalized_title.clone(),
tags,
model,
)
.await?;
let target = AudioAssetBindingTarget {
storage_scope: target.entity_kind.clone(),
entity_kind: target.entity_kind,
entity_id: target.entity_id,
slot: target.slot,
asset_kind: target.asset_kind,
profile_id: target.profile_id,
storage_prefix: target.storage_prefix,
};
let generated = wait_for_generated_audio_asset(
state,
owner_user_id,
task.task_id.clone(),
AudioAssetSlot::BackgroundMusic,
target,
)
.await?;
let audio_src = generated
.audio_src
.ok_or_else(|| vector_engine_bad_gateway("背景音乐生成完成但缺少播放地址"))?;
Ok(creation_audio::CreationAudioAsset {
task_id: generated.task_id,
provider: generated.provider,
asset_object_id: generated.asset_object_id,
asset_kind: generated.asset_kind,
audio_src,
prompt: Some(normalized_prompt),
title: Some(normalized_title),
updated_at: Some(current_utc_iso_text()),
})
}
async fn create_background_music_task_response(
state: &AppState,
prompt: String,
title: String,
tags: Option<String>,
model: Option<String>,
) -> Result<creation_audio::AudioGenerationTaskResponse, AppError> {
let settings = require_vector_engine_audio_settings(state)?;
let http_client = build_vector_engine_audio_http_client(&settings)?;
let prompt = normalize_limited_text_allow_empty(&prompt, "prompt", SUNO_PROMPT_MAX_CHARS)?;
let title = normalize_limited_text(&title, "title", SUNO_TITLE_MAX_CHARS)?;
let tags = tags
.as_deref()
.map(|value| normalize_limited_text(value, "tags", SUNO_TAGS_MAX_CHARS))
.transpose()?;
let model =
normalize_optional_text(model.as_deref()).unwrap_or_else(|| SUNO_DEFAULT_MODEL.to_string());
let mut body = Map::from_iter([
("prompt".to_string(), Value::String(prompt)),
("mv".to_string(), Value::String(model)),
("title".to_string(), Value::String(title)),
("task".to_string(), Value::String("generate".to_string())),
("make_instrumental".to_string(), Value::Bool(true)),
]);
if let Some(tags) = tags {
body.insert("tags".to_string(), Value::String(tags));
}
let response = post_vector_engine_json(
&http_client,
&settings,
"/suno/submit/music",
Value::Object(body),
"提交 Suno 背景音乐任务失败",
)
.await?;
let task_id = extract_string_by_path(&response, &["data"])
.or_else(|| find_first_string_by_key(&response, "task_id"))
.or_else(|| find_first_string_by_key(&response, "taskId"))
.ok_or_else(|| {
vector_engine_bad_gateway("提交 Suno 背景音乐任务失败:上游未返回任务 ID")
})?;
Ok(creation_audio::AudioGenerationTaskResponse {
kind: creation_audio::CreationAudioGenerationKind::BackgroundMusic,
task_id,
provider: VECTOR_ENGINE_SUNO_PROVIDER.to_string(),
status: "submitted".to_string(),
})
}
async fn create_sound_effect_task_response(
state: &AppState,
prompt: String,
duration: Option<u8>,
seed: Option<u64>,
) -> Result<creation_audio::AudioGenerationTaskResponse, AppError> {
let settings = require_vector_engine_audio_settings(state)?;
let http_client = build_vector_engine_audio_http_client(&settings)?;
let prompt = normalize_limited_text(&prompt, "prompt", VIDU_PROMPT_MAX_CHARS)?;
let duration = duration
.unwrap_or(DEFAULT_SOUND_EFFECT_DURATION_SECONDS)
.clamp(2, 10);
let mut body = Map::from_iter([
(
"model".to_string(),
Value::String(VIDU_AUDIO_MODEL.to_string()),
),
("prompt".to_string(), Value::String(prompt)),
("duration".to_string(), json!(duration)),
]);
if let Some(seed) = seed {
body.insert("seed".to_string(), json!(seed));
}
let response = post_vector_engine_json(
&http_client,
&settings,
"/ent/v2/text2audio",
Value::Object(body),
"提交 Vidu 音效任务失败",
)
.await?;
let task_id = find_first_string_by_key(&response, "task_id")
.or_else(|| find_first_string_by_key(&response, "taskId"))
.ok_or_else(|| vector_engine_bad_gateway("提交 Vidu 音效任务失败:上游未返回任务 ID"))?;
let status = find_first_string_by_key(&response, "state").unwrap_or_else(|| "created".into());
Ok(creation_audio::AudioGenerationTaskResponse {
kind: creation_audio::CreationAudioGenerationKind::SoundEffect,
task_id,
provider: VECTOR_ENGINE_VIDU_PROVIDER.to_string(),
status,
})
let _ = parse_json_payload(&request_context, payload)?;
Err(creation_audio_generation_disabled_error()
.into_response_with_context(Some(&request_context)))
}
pub async fn publish_visual_novel_background_music_asset(
@@ -516,45 +301,27 @@ pub async fn publish_visual_novel_sound_effect_asset(
}
pub async fn publish_background_music_asset(
State(state): State<AppState>,
Path(task_id): Path<String>,
State(_state): State<AppState>,
Path(_task_id): Path<String>,
axum::extract::Extension(request_context): axum::extract::Extension<RequestContext>,
axum::extract::Extension(authenticated): axum::extract::Extension<AuthenticatedAccessToken>,
axum::extract::Extension(_authenticated): axum::extract::Extension<AuthenticatedAccessToken>,
payload: Result<Json<creation_audio::PublishGeneratedAudioAssetRequest>, JsonRejection>,
) -> Result<Json<Value>, Response> {
let payload = parse_json_payload(&request_context, payload)?.0;
let target = build_creation_audio_target(payload)?;
publish_generated_audio_asset(
&state,
authenticated.claims().user_id(),
task_id,
AudioAssetSlot::BackgroundMusic,
target,
)
.await
.map(|payload| json_success_body(Some(&request_context), payload))
.map_err(|error| error.into_response_with_context(Some(&request_context)))
Err(creation_audio_generation_disabled_error_for_target(payload)
.into_response_with_context(Some(&request_context)))
}
pub async fn publish_sound_effect_asset(
State(state): State<AppState>,
Path(task_id): Path<String>,
State(_state): State<AppState>,
Path(_task_id): Path<String>,
axum::extract::Extension(request_context): axum::extract::Extension<RequestContext>,
axum::extract::Extension(authenticated): axum::extract::Extension<AuthenticatedAccessToken>,
axum::extract::Extension(_authenticated): axum::extract::Extension<AuthenticatedAccessToken>,
payload: Result<Json<creation_audio::PublishGeneratedAudioAssetRequest>, JsonRejection>,
) -> Result<Json<Value>, Response> {
let payload = parse_json_payload(&request_context, payload)?.0;
let target = build_creation_audio_target(payload)?;
publish_generated_audio_asset(
&state,
authenticated.claims().user_id(),
task_id,
AudioAssetSlot::SoundEffect,
target,
)
.await
.map(|payload| json_success_body(Some(&request_context), payload))
.map_err(|error| error.into_response_with_context(Some(&request_context)))
Err(creation_audio_generation_disabled_error_for_target(payload)
.into_response_with_context(Some(&request_context)))
}
async fn publish_generated_audio_asset(
@@ -650,45 +417,6 @@ async fn publish_generated_audio_asset(
})
}
async fn wait_for_generated_audio_asset(
state: &AppState,
owner_user_id: &str,
task_id: String,
slot: AudioAssetSlot,
target: AudioAssetBindingTarget,
) -> Result<creation_audio::GeneratedAudioAssetResponse, AppError> {
let mut latest_status = String::new();
for _ in 0..40 {
let response = publish_generated_audio_asset(
state,
owner_user_id,
task_id.clone(),
slot,
target.clone(),
)
.await?;
if response
.audio_src
.as_deref()
.map(str::trim)
.is_some_and(|value| !value.is_empty())
{
return Ok(response);
}
latest_status = response.status;
tokio::time::sleep(Duration::from_millis(3_000)).await;
}
Err(vector_engine_bad_gateway(format!(
"音频生成超时:{}",
if latest_status.trim().is_empty() {
task_id
} else {
latest_status
}
)))
}
fn build_audio_billing_asset_id(
task_id: &str,
slot: AudioAssetSlot,
@@ -888,33 +616,21 @@ fn build_visual_novel_audio_target(
})
}
fn build_creation_audio_target(
fn creation_audio_generation_disabled_error() -> AppError {
AppError::from_status(StatusCode::GONE).with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
"message": "拼图与抓大鹅音频生成入口已临时关闭",
}))
}
fn creation_audio_generation_disabled_error_for_target(
payload: creation_audio::PublishGeneratedAudioAssetRequest,
) -> Result<AudioAssetBindingTarget, AppError> {
let entity_kind = normalize_limited_text(&payload.entity_kind, "entityKind", 80)?;
let entity_id = normalize_limited_text(&payload.entity_id, "entityId", 160)?;
let slot = normalize_limited_text(&payload.slot, "slot", 80)?;
let asset_kind = normalize_limited_text(&payload.asset_kind, "assetKind", 80)?;
let storage_prefix = match payload.storage_prefix {
Some(creation_audio::CreationAudioStoragePrefix::PuzzleAssets) => {
LegacyAssetPrefix::PuzzleAssets
}
Some(creation_audio::CreationAudioStoragePrefix::Match3DAssets) => {
LegacyAssetPrefix::Match3DAssets
}
Some(creation_audio::CreationAudioStoragePrefix::CustomWorldScenes) | None => {
LegacyAssetPrefix::CustomWorldScenes
}
};
Ok(AudioAssetBindingTarget {
storage_scope: entity_kind.clone(),
entity_kind,
entity_id,
slot,
asset_kind,
profile_id: normalize_optional_text(payload.profile_id.as_deref()),
storage_prefix,
})
) -> AppError {
creation_audio_generation_disabled_error().with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
"message": "拼图与抓大鹅音频生成入口已临时关闭",
"entityKind": payload.entity_kind.trim(),
}))
}
fn require_vector_engine_audio_settings(
@@ -1253,24 +969,6 @@ fn normalize_limited_text(
Ok(normalized)
}
fn normalize_limited_text_allow_empty(
value: &str,
field: &'static str,
max_chars: usize,
) -> Result<String, AppError> {
let normalized = value.trim().to_string();
if normalized.chars().count() > max_chars {
return Err(
AppError::from_status(StatusCode::BAD_REQUEST).with_details(json!({
"provider": VECTOR_ENGINE_PROVIDER,
"field": field,
"message": format!("{field} 超过 {} 字符", max_chars),
})),
);
}
Ok(normalized)
}
fn normalize_optional_text(value: Option<&str>) -> Option<String> {
value
.map(str::trim)
@@ -1369,11 +1067,6 @@ fn current_utc_micros() -> i64 {
shared_kernel::offset_datetime_to_unix_micros(time::OffsetDateTime::now_utc())
}
fn current_utc_iso_text() -> String {
shared_kernel::format_rfc3339(time::OffsetDateTime::now_utc())
.unwrap_or_else(|_| shared_kernel::format_timestamp_micros(current_utc_micros()))
}
fn map_asset_field_error(error: module_assets::AssetObjectFieldError) -> AppError {
AppError::from_status(StatusCode::BAD_REQUEST).with_details(json!({
"provider": "asset-object",
@@ -1473,6 +1166,42 @@ mod tests {
);
}
#[test]
fn disabled_creation_audio_targets_return_gone() {
let payload = creation_audio::PublishGeneratedAudioAssetRequest {
entity_kind: "puzzle_work".to_string(),
entity_id: "puzzle-profile-1".to_string(),
slot: "background_music".to_string(),
asset_kind: "puzzle_background_music".to_string(),
profile_id: Some("puzzle-profile-1".to_string()),
storage_prefix: Some(creation_audio::CreationAudioStoragePrefix::PuzzleAssets),
};
let error = creation_audio_generation_disabled_error_for_target(payload);
assert_eq!(error.status_code(), StatusCode::GONE);
let payload = creation_audio::PublishGeneratedAudioAssetRequest {
entity_kind: "match3d_work".to_string(),
entity_id: "match3d-profile-1".to_string(),
slot: "background_music".to_string(),
asset_kind: "match3d_background_music".to_string(),
profile_id: Some("match3d-profile-1".to_string()),
storage_prefix: Some(creation_audio::CreationAudioStoragePrefix::Match3DAssets),
};
let error = creation_audio_generation_disabled_error_for_target(payload);
assert_eq!(error.status_code(), StatusCode::GONE);
let payload = creation_audio::PublishGeneratedAudioAssetRequest {
entity_kind: "match3d_item".to_string(),
entity_id: "match3d-item-1".to_string(),
slot: "click_sound".to_string(),
asset_kind: "match3d_click_sound".to_string(),
profile_id: Some("match3d-profile-1".to_string()),
storage_prefix: Some(creation_audio::CreationAudioStoragePrefix::Match3DAssets),
};
let error = creation_audio_generation_disabled_error_for_target(payload);
assert_eq!(error.status_code(), StatusCode::GONE);
}
#[test]
fn validates_prompt_length() {
let prompt = "".repeat(VIDU_PROMPT_MAX_CHARS + 1);