完善外部生成Worker动态扩缩容
新增外部生成controller进程角色与systemd服务 补齐队列统计procedure与spacetime-client绑定 更新生产部署脚本、健康巡检和server provision的worker/controller口径 新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
@@ -137,6 +137,27 @@ pub struct ExternalGenerationJobProcedureResult {
|
||||
pub error_message: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, SpacetimeType)]
|
||||
pub struct ExternalGenerationQueueStatsSnapshot {
|
||||
pub pending_count: u32,
|
||||
pub delayed_pending_count: u32,
|
||||
pub claimable_pending_count: u32,
|
||||
pub running_active_count: u32,
|
||||
pub expired_running_count: u32,
|
||||
// 中文注释:保留字段兼容已生成 bindings;controller 只按非终态队列压力扩缩容,不每轮扫描历史终态任务。
|
||||
pub terminal_count: u32,
|
||||
pub claimable_count: u32,
|
||||
pub oldest_claimable_age_micros: Option<i64>,
|
||||
pub now_micros: i64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, SpacetimeType)]
|
||||
pub struct ExternalGenerationQueueStatsProcedureResult {
|
||||
pub ok: bool,
|
||||
pub stats: Option<ExternalGenerationQueueStatsSnapshot>,
|
||||
pub error_message: Option<String>,
|
||||
}
|
||||
|
||||
#[spacetimedb::procedure]
|
||||
pub fn enqueue_external_generation_job_and_return(
|
||||
ctx: &mut ProcedureContext,
|
||||
@@ -197,6 +218,24 @@ pub fn fail_external_generation_job_and_return(
|
||||
}
|
||||
}
|
||||
|
||||
#[spacetimedb::procedure]
|
||||
pub fn get_external_generation_queue_stats_and_return(
|
||||
ctx: &mut ProcedureContext,
|
||||
) -> ExternalGenerationQueueStatsProcedureResult {
|
||||
match ctx.try_with_tx(|tx| get_external_generation_queue_stats_tx(tx)) {
|
||||
Ok(stats) => ExternalGenerationQueueStatsProcedureResult {
|
||||
ok: true,
|
||||
stats: Some(stats),
|
||||
error_message: None,
|
||||
},
|
||||
Err(message) => ExternalGenerationQueueStatsProcedureResult {
|
||||
ok: false,
|
||||
stats: None,
|
||||
error_message: Some(message),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn enqueue_external_generation_job_tx(
|
||||
ctx: &ReducerContext,
|
||||
input: ExternalGenerationJobEnqueueInput,
|
||||
@@ -427,6 +466,58 @@ fn fail_external_generation_job_tx(
|
||||
Ok(map_external_generation_job_row(row))
|
||||
}
|
||||
|
||||
fn get_external_generation_queue_stats_tx(
|
||||
ctx: &ReducerContext,
|
||||
) -> Result<ExternalGenerationQueueStatsSnapshot, String> {
|
||||
let now = ctx.timestamp;
|
||||
let now_micros = now.to_micros_since_unix_epoch();
|
||||
let mut stats = ExternalGenerationQueueStatsSnapshot {
|
||||
pending_count: 0,
|
||||
delayed_pending_count: 0,
|
||||
claimable_pending_count: 0,
|
||||
running_active_count: 0,
|
||||
expired_running_count: 0,
|
||||
terminal_count: 0,
|
||||
claimable_count: 0,
|
||||
oldest_claimable_age_micros: None,
|
||||
now_micros,
|
||||
};
|
||||
|
||||
for row in ctx
|
||||
.db
|
||||
.external_generation_job()
|
||||
.by_external_generation_job_status_available()
|
||||
.filter(&EXTERNAL_GENERATION_STATUS_PENDING.to_string())
|
||||
{
|
||||
stats.pending_count = stats.pending_count.saturating_add(1);
|
||||
if is_external_generation_job_claimable(&row, now) {
|
||||
stats.claimable_pending_count = stats.claimable_pending_count.saturating_add(1);
|
||||
record_external_generation_claimable_age(&mut stats, &row, now_micros);
|
||||
} else {
|
||||
stats.delayed_pending_count = stats.delayed_pending_count.saturating_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
for row in ctx
|
||||
.db
|
||||
.external_generation_job()
|
||||
.by_external_generation_job_status_available()
|
||||
.filter(&EXTERNAL_GENERATION_STATUS_RUNNING.to_string())
|
||||
{
|
||||
if is_external_generation_job_claimable(&row, now) {
|
||||
stats.expired_running_count = stats.expired_running_count.saturating_add(1);
|
||||
record_external_generation_claimable_age(&mut stats, &row, now_micros);
|
||||
} else {
|
||||
stats.running_active_count = stats.running_active_count.saturating_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
stats.claimable_count = stats
|
||||
.claimable_pending_count
|
||||
.saturating_add(stats.expired_running_count);
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
pub(crate) fn validate_external_generation_job_lease_for_tx(
|
||||
ctx: &ReducerContext,
|
||||
job_id: &str,
|
||||
@@ -524,6 +615,22 @@ fn is_external_generation_job_claimable(row: &ExternalGenerationJob, now: Timest
|
||||
}
|
||||
}
|
||||
|
||||
fn record_external_generation_claimable_age(
|
||||
stats: &mut ExternalGenerationQueueStatsSnapshot,
|
||||
row: &ExternalGenerationJob,
|
||||
now_micros: i64,
|
||||
) {
|
||||
let age = now_micros
|
||||
.saturating_sub(row.available_at.to_micros_since_unix_epoch())
|
||||
.max(0);
|
||||
stats.oldest_claimable_age_micros = Some(
|
||||
stats
|
||||
.oldest_claimable_age_micros
|
||||
.map(|current| current.max(age))
|
||||
.unwrap_or(age),
|
||||
);
|
||||
}
|
||||
|
||||
fn persist_external_generation_job_row(ctx: &ReducerContext, row: ExternalGenerationJob) {
|
||||
ctx.db
|
||||
.external_generation_job()
|
||||
@@ -725,6 +832,30 @@ mod tests {
|
||||
assert_ne!(first, second);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn claimable_age_keeps_oldest_available_job() {
|
||||
let mut stats = ExternalGenerationQueueStatsSnapshot {
|
||||
pending_count: 0,
|
||||
delayed_pending_count: 0,
|
||||
claimable_pending_count: 0,
|
||||
running_active_count: 0,
|
||||
expired_running_count: 0,
|
||||
terminal_count: 0,
|
||||
claimable_count: 0,
|
||||
oldest_claimable_age_micros: None,
|
||||
now_micros: 10_000,
|
||||
};
|
||||
let mut old_job = external_generation_job_fixture(EXTERNAL_GENERATION_STATUS_PENDING);
|
||||
old_job.available_at = micros(1_000);
|
||||
let mut newer_job = external_generation_job_fixture(EXTERNAL_GENERATION_STATUS_RUNNING);
|
||||
newer_job.available_at = micros(8_000);
|
||||
|
||||
record_external_generation_claimable_age(&mut stats, &newer_job, 10_000);
|
||||
record_external_generation_claimable_age(&mut stats, &old_job, 10_000);
|
||||
|
||||
assert_eq!(stats.oldest_claimable_age_micros, Some(9_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn positive_duration_between_client_times_is_preserved() {
|
||||
assert_eq!(
|
||||
|
||||
Reference in New Issue
Block a user