完善外部生成Worker动态扩缩容
新增外部生成controller进程角色与systemd服务 补齐队列统计procedure与spacetime-client绑定 更新生产部署脚本、健康巡检和server provision的worker/controller口径 新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
@@ -20,9 +20,11 @@ const DEFAULT_PUBLIC_PATHS = [
|
||||
|
||||
const DEFAULT_SERVICES = [
|
||||
'genarrative-api.service',
|
||||
'genarrative-external-generation-controller.service',
|
||||
'spacetimedb.service',
|
||||
'nginx.service',
|
||||
];
|
||||
const WORKER_SERVICE_PATTERN = 'genarrative-external-generation-worker@*.service';
|
||||
|
||||
function usage() {
|
||||
console.log(`Usage:
|
||||
@@ -216,6 +218,61 @@ async function checkService(serviceName, timeoutMs) {
|
||||
);
|
||||
}
|
||||
|
||||
async function checkActiveWorkerInstances(config) {
|
||||
const result = await runCommand(
|
||||
'systemctl',
|
||||
[
|
||||
'list-units',
|
||||
WORKER_SERVICE_PATTERN,
|
||||
'--type=service',
|
||||
'--state=active',
|
||||
'--no-legend',
|
||||
'--plain',
|
||||
'--no-pager',
|
||||
],
|
||||
config.timeoutMs,
|
||||
);
|
||||
if (result.code !== 0) {
|
||||
return checkResult(
|
||||
'service:external-generation-workers',
|
||||
'CRITICAL',
|
||||
'无法枚举外部生成 worker 实例',
|
||||
{
|
||||
command: result.command,
|
||||
stderr: result.stderr.trim() || result.error,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const services = result.stdout
|
||||
.split('\n')
|
||||
.map((line) => line.trim().split(/\s+/u)[0])
|
||||
.filter((service) =>
|
||||
/^genarrative-external-generation-worker@.+\.service$/u.test(service),
|
||||
);
|
||||
|
||||
if (services.length === 0) {
|
||||
return checkResult(
|
||||
'service:external-generation-workers',
|
||||
'CRITICAL',
|
||||
'没有 active 的外部生成 worker 实例',
|
||||
{
|
||||
command: result.command,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return checkResult(
|
||||
'service:external-generation-workers',
|
||||
'OK',
|
||||
`${services.length} 个 worker active`,
|
||||
{
|
||||
command: result.command,
|
||||
services,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function requestUrl(url, timeoutMs) {
|
||||
return new Promise((resolve) => {
|
||||
const startedAt = Date.now();
|
||||
@@ -310,6 +367,10 @@ async function checkRecentJournal(config) {
|
||||
'-u',
|
||||
'genarrative-api.service',
|
||||
'-u',
|
||||
'genarrative-external-generation-controller.service',
|
||||
'-u',
|
||||
WORKER_SERVICE_PATTERN,
|
||||
'-u',
|
||||
'spacetimedb.service',
|
||||
'-u',
|
||||
'nginx.service',
|
||||
@@ -426,6 +487,7 @@ async function main() {
|
||||
for (const serviceName of DEFAULT_SERVICES) {
|
||||
checks.push(await checkService(serviceName, config.timeoutMs));
|
||||
}
|
||||
checks.push(await checkActiveWorkerInstances(config));
|
||||
|
||||
checks.push(await checkHttp('api:/healthz', joinUrl(config.apiBaseUrl, '/healthz'), config));
|
||||
checks.push(await checkHttp('api:/readyz', joinUrl(config.apiBaseUrl, '/readyz'), config));
|
||||
|
||||
Reference in New Issue
Block a user