完善外部生成Worker动态扩缩容

新增外部生成controller进程角色与systemd服务

补齐队列统计procedure与spacetime-client绑定

更新生产部署脚本、健康巡检和server provision的worker/controller口径

新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
2026-06-12 15:21:35 +08:00
parent 69815d918a
commit 4a6c126366
30 changed files with 2030 additions and 28 deletions

View File

@@ -137,6 +137,27 @@ pub struct ExternalGenerationJobProcedureResult {
pub error_message: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq, SpacetimeType)]
pub struct ExternalGenerationQueueStatsSnapshot {
pub pending_count: u32,
pub delayed_pending_count: u32,
pub claimable_pending_count: u32,
pub running_active_count: u32,
pub expired_running_count: u32,
// 中文注释:保留字段兼容已生成 bindingscontroller 只按非终态队列压力扩缩容,不每轮扫描历史终态任务。
pub terminal_count: u32,
pub claimable_count: u32,
pub oldest_claimable_age_micros: Option<i64>,
pub now_micros: i64,
}
#[derive(Clone, Debug, PartialEq, Eq, SpacetimeType)]
pub struct ExternalGenerationQueueStatsProcedureResult {
pub ok: bool,
pub stats: Option<ExternalGenerationQueueStatsSnapshot>,
pub error_message: Option<String>,
}
#[spacetimedb::procedure]
pub fn enqueue_external_generation_job_and_return(
ctx: &mut ProcedureContext,
@@ -197,6 +218,24 @@ pub fn fail_external_generation_job_and_return(
}
}
#[spacetimedb::procedure]
pub fn get_external_generation_queue_stats_and_return(
ctx: &mut ProcedureContext,
) -> ExternalGenerationQueueStatsProcedureResult {
match ctx.try_with_tx(|tx| get_external_generation_queue_stats_tx(tx)) {
Ok(stats) => ExternalGenerationQueueStatsProcedureResult {
ok: true,
stats: Some(stats),
error_message: None,
},
Err(message) => ExternalGenerationQueueStatsProcedureResult {
ok: false,
stats: None,
error_message: Some(message),
},
}
}
fn enqueue_external_generation_job_tx(
ctx: &ReducerContext,
input: ExternalGenerationJobEnqueueInput,
@@ -427,6 +466,58 @@ fn fail_external_generation_job_tx(
Ok(map_external_generation_job_row(row))
}
fn get_external_generation_queue_stats_tx(
ctx: &ReducerContext,
) -> Result<ExternalGenerationQueueStatsSnapshot, String> {
let now = ctx.timestamp;
let now_micros = now.to_micros_since_unix_epoch();
let mut stats = ExternalGenerationQueueStatsSnapshot {
pending_count: 0,
delayed_pending_count: 0,
claimable_pending_count: 0,
running_active_count: 0,
expired_running_count: 0,
terminal_count: 0,
claimable_count: 0,
oldest_claimable_age_micros: None,
now_micros,
};
for row in ctx
.db
.external_generation_job()
.by_external_generation_job_status_available()
.filter(&EXTERNAL_GENERATION_STATUS_PENDING.to_string())
{
stats.pending_count = stats.pending_count.saturating_add(1);
if is_external_generation_job_claimable(&row, now) {
stats.claimable_pending_count = stats.claimable_pending_count.saturating_add(1);
record_external_generation_claimable_age(&mut stats, &row, now_micros);
} else {
stats.delayed_pending_count = stats.delayed_pending_count.saturating_add(1);
}
}
for row in ctx
.db
.external_generation_job()
.by_external_generation_job_status_available()
.filter(&EXTERNAL_GENERATION_STATUS_RUNNING.to_string())
{
if is_external_generation_job_claimable(&row, now) {
stats.expired_running_count = stats.expired_running_count.saturating_add(1);
record_external_generation_claimable_age(&mut stats, &row, now_micros);
} else {
stats.running_active_count = stats.running_active_count.saturating_add(1);
}
}
stats.claimable_count = stats
.claimable_pending_count
.saturating_add(stats.expired_running_count);
Ok(stats)
}
pub(crate) fn validate_external_generation_job_lease_for_tx(
ctx: &ReducerContext,
job_id: &str,
@@ -524,6 +615,22 @@ fn is_external_generation_job_claimable(row: &ExternalGenerationJob, now: Timest
}
}
fn record_external_generation_claimable_age(
stats: &mut ExternalGenerationQueueStatsSnapshot,
row: &ExternalGenerationJob,
now_micros: i64,
) {
let age = now_micros
.saturating_sub(row.available_at.to_micros_since_unix_epoch())
.max(0);
stats.oldest_claimable_age_micros = Some(
stats
.oldest_claimable_age_micros
.map(|current| current.max(age))
.unwrap_or(age),
);
}
fn persist_external_generation_job_row(ctx: &ReducerContext, row: ExternalGenerationJob) {
ctx.db
.external_generation_job()
@@ -725,6 +832,30 @@ mod tests {
assert_ne!(first, second);
}
#[test]
fn claimable_age_keeps_oldest_available_job() {
let mut stats = ExternalGenerationQueueStatsSnapshot {
pending_count: 0,
delayed_pending_count: 0,
claimable_pending_count: 0,
running_active_count: 0,
expired_running_count: 0,
terminal_count: 0,
claimable_count: 0,
oldest_claimable_age_micros: None,
now_micros: 10_000,
};
let mut old_job = external_generation_job_fixture(EXTERNAL_GENERATION_STATUS_PENDING);
old_job.available_at = micros(1_000);
let mut newer_job = external_generation_job_fixture(EXTERNAL_GENERATION_STATUS_RUNNING);
newer_job.available_at = micros(8_000);
record_external_generation_claimable_age(&mut stats, &newer_job, 10_000);
record_external_generation_claimable_age(&mut stats, &old_job, 10_000);
assert_eq!(stats.oldest_claimable_age_micros, Some(9_000));
}
#[test]
fn positive_duration_between_client_times_is_preserved() {
assert_eq!(