完善外部生成Worker动态扩缩容

新增外部生成controller进程角色与systemd服务

补齐队列统计procedure与spacetime-client绑定

更新生产部署脚本、健康巡检和server provision的worker/controller口径

新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
2026-06-12 15:21:35 +08:00
parent 69815d918a
commit 4a6c126366
30 changed files with 2030 additions and 28 deletions

View File

@@ -20,9 +20,11 @@ const DEFAULT_PUBLIC_PATHS = [
const DEFAULT_SERVICES = [
'genarrative-api.service',
'genarrative-external-generation-controller.service',
'spacetimedb.service',
'nginx.service',
];
const WORKER_SERVICE_PATTERN = 'genarrative-external-generation-worker@*.service';
function usage() {
console.log(`Usage:
@@ -216,6 +218,61 @@ async function checkService(serviceName, timeoutMs) {
);
}
async function checkActiveWorkerInstances(config) {
const result = await runCommand(
'systemctl',
[
'list-units',
WORKER_SERVICE_PATTERN,
'--type=service',
'--state=active',
'--no-legend',
'--plain',
'--no-pager',
],
config.timeoutMs,
);
if (result.code !== 0) {
return checkResult(
'service:external-generation-workers',
'CRITICAL',
'无法枚举外部生成 worker 实例',
{
command: result.command,
stderr: result.stderr.trim() || result.error,
},
);
}
const services = result.stdout
.split('\n')
.map((line) => line.trim().split(/\s+/u)[0])
.filter((service) =>
/^genarrative-external-generation-worker@.+\.service$/u.test(service),
);
if (services.length === 0) {
return checkResult(
'service:external-generation-workers',
'CRITICAL',
'没有 active 的外部生成 worker 实例',
{
command: result.command,
},
);
}
return checkResult(
'service:external-generation-workers',
'OK',
`${services.length} 个 worker active`,
{
command: result.command,
services,
},
);
}
function requestUrl(url, timeoutMs) {
return new Promise((resolve) => {
const startedAt = Date.now();
@@ -310,6 +367,10 @@ async function checkRecentJournal(config) {
'-u',
'genarrative-api.service',
'-u',
'genarrative-external-generation-controller.service',
'-u',
WORKER_SERVICE_PATTERN,
'-u',
'spacetimedb.service',
'-u',
'nginx.service',
@@ -426,6 +487,7 @@ async function main() {
for (const serviceName of DEFAULT_SERVICES) {
checks.push(await checkService(serviceName, config.timeoutMs));
}
checks.push(await checkActiveWorkerInstances(config));
checks.push(await checkHttp('api:/healthz', joinUrl(config.apiBaseUrl, '/healthz'), config));
checks.push(await checkHttp('api:/readyz', joinUrl(config.apiBaseUrl, '/readyz'), config));