完善外部生成Worker动态扩缩容

新增外部生成controller进程角色与systemd服务

补齐队列统计procedure与spacetime-client绑定

更新生产部署脚本、健康巡检和server provision的worker/controller口径

新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
2026-06-12 15:21:35 +08:00
parent 69815d918a
commit 4a6c126366
30 changed files with 2030 additions and 28 deletions

View File

@@ -28,6 +28,13 @@ pub struct AppConfig {
pub external_generation_worker_concurrency: usize,
pub external_generation_worker_poll_interval: Duration,
pub external_generation_worker_lease: Duration,
pub external_generation_controller_min_workers: usize,
pub external_generation_controller_max_workers: usize,
pub external_generation_controller_target_jobs_per_worker: usize,
pub external_generation_controller_poll_interval: Duration,
pub external_generation_controller_scale_down_idle_rounds: u32,
pub external_generation_controller_service_template: String,
pub external_generation_controller_dry_run: bool,
pub max_concurrent_requests: Option<usize>,
pub gallery_max_concurrent_requests: Option<usize>,
pub detail_max_concurrent_requests: Option<usize>,
@@ -181,6 +188,7 @@ pub struct AppConfig {
pub enum ProcessRole {
Api,
ExternalGenerationWorker,
ExternalGenerationController,
All,
}
@@ -208,6 +216,7 @@ impl ProcessRole {
match self {
Self::Api => "api",
Self::ExternalGenerationWorker => "external-generation-worker",
Self::ExternalGenerationController => "external-generation-controller",
Self::All => "all",
}
}
@@ -219,6 +228,10 @@ impl ProcessRole {
pub fn runs_external_generation_worker(self) -> bool {
matches!(self, Self::ExternalGenerationWorker | Self::All)
}
pub fn runs_external_generation_controller(self) -> bool {
matches!(self, Self::ExternalGenerationController)
}
}
impl Default for AppConfig {
@@ -234,6 +247,14 @@ impl Default for AppConfig {
external_generation_worker_concurrency: 2,
external_generation_worker_poll_interval: Duration::from_millis(2_000),
external_generation_worker_lease: Duration::from_secs(3_600),
external_generation_controller_min_workers: 1,
external_generation_controller_max_workers: 8,
external_generation_controller_target_jobs_per_worker: 2,
external_generation_controller_poll_interval: Duration::from_millis(10_000),
external_generation_controller_scale_down_idle_rounds: 6,
external_generation_controller_service_template:
"genarrative-external-generation-worker@{}.service".to_string(),
external_generation_controller_dry_run: false,
max_concurrent_requests: None,
gallery_max_concurrent_requests: None,
detail_max_concurrent_requests: None,
@@ -459,6 +480,49 @@ impl AppConfig {
]) {
config.external_generation_worker_lease = Duration::from_secs(lease_seconds.max(1));
}
if let Some(min_workers) =
read_first_usize_env(&["GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_MIN_WORKERS"])
{
config.external_generation_controller_min_workers = min_workers;
}
if let Some(max_workers) =
read_first_usize_env(&["GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_MAX_WORKERS"])
{
config.external_generation_controller_max_workers = max_workers;
}
if config.external_generation_controller_max_workers
< config.external_generation_controller_min_workers
{
config.external_generation_controller_max_workers =
config.external_generation_controller_min_workers;
}
if let Some(target_jobs_per_worker) = read_first_usize_env(&[
"GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_TARGET_JOBS_PER_WORKER",
]) {
config.external_generation_controller_target_jobs_per_worker =
target_jobs_per_worker.max(1);
}
if let Some(poll_interval_ms) = read_first_positive_u64_env(&[
"GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_POLL_INTERVAL_MS",
]) {
config.external_generation_controller_poll_interval =
Duration::from_millis(poll_interval_ms);
}
if let Some(idle_rounds) = read_first_u32_env(&[
"GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_SCALE_DOWN_IDLE_ROUNDS",
]) {
config.external_generation_controller_scale_down_idle_rounds = idle_rounds;
}
if let Some(service_template) = read_first_non_empty_env(&[
"GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_SERVICE_TEMPLATE",
]) {
config.external_generation_controller_service_template = service_template;
}
if let Some(dry_run) =
read_first_bool_env(&["GENARRATIVE_EXTERNAL_GENERATION_CONTROLLER_DRY_RUN"])
{
config.external_generation_controller_dry_run = dry_run;
}
if let Some(max_concurrent_requests) =
read_first_usize_env(&["GENARRATIVE_API_MAX_CONCURRENT_REQUESTS"])
{
@@ -1214,6 +1278,9 @@ fn parse_process_role(value: &str) -> Option<ProcessRole> {
"external-generation-worker" | "external_generation_worker" | "worker" => {
Some(ProcessRole::ExternalGenerationWorker)
}
"external-generation-controller" | "external_generation_controller" | "controller" => {
Some(ProcessRole::ExternalGenerationController)
}
"all" => Some(ProcessRole::All),
_ => None,
}
@@ -1419,15 +1486,29 @@ mod tests {
parse_process_role("worker"),
Some(ProcessRole::ExternalGenerationWorker)
);
assert_eq!(
parse_process_role("controller"),
Some(ProcessRole::ExternalGenerationController)
);
assert_eq!(
parse_process_role("'external_generation_controller'"),
Some(ProcessRole::ExternalGenerationController)
);
assert_eq!(parse_process_role("all"), Some(ProcessRole::All));
assert_eq!(parse_process_role("unknown"), None);
assert!(ProcessRole::Api.runs_http());
assert!(!ProcessRole::Api.runs_external_generation_worker());
assert!(!ProcessRole::Api.runs_external_generation_controller());
assert!(!ProcessRole::ExternalGenerationWorker.runs_http());
assert!(ProcessRole::ExternalGenerationWorker.runs_external_generation_worker());
assert!(!ProcessRole::ExternalGenerationWorker.runs_external_generation_controller());
assert!(!ProcessRole::ExternalGenerationController.runs_http());
assert!(!ProcessRole::ExternalGenerationController.runs_external_generation_worker());
assert!(ProcessRole::ExternalGenerationController.runs_external_generation_controller());
assert!(ProcessRole::All.runs_http());
assert!(ProcessRole::All.runs_external_generation_worker());
assert!(!ProcessRole::All.runs_external_generation_controller());
}
#[test]