#!/usr/bin/env node import {execFile} from 'node:child_process'; import http from 'node:http'; import https from 'node:https'; import {mkdir, writeFile} from 'node:fs/promises'; import {dirname} from 'node:path'; const STATUS_RANK = { OK: 0, WARNING: 1, CRITICAL: 2, }; const DEFAULT_PUBLIC_PATHS = [ '/api/creation-entry/config', '/api/runtime/puzzle/gallery', '/api/runtime/custom-world-gallery', ]; const DEFAULT_SERVICES = [ 'genarrative-api.service', 'spacetimedb.service', 'nginx.service', ]; function usage() { console.log(`Usage: node scripts/ops/production-health-patrol.mjs [options] Options: --api-base-url API direct base URL, default http://127.0.0.1:8082 --spacetime-base-url SpacetimeDB base URL, default http://127.0.0.1:3101 --public-base-url Nginx/public base URL, default http://127.0.0.1 --public-path Public API path to probe; repeatable --status-file Write the last patrol result as JSON --timeout-ms HTTP/command timeout, default 5000 --slow-ms Mark successful probes slower than this as WARNING, default 3000 --fail-on-warning Exit 1 when the total status is WARNING --skip-journal Skip recent journal error scan --json Print JSON instead of text `); } function readBoolEnv(name, fallback = false) { const value = process.env[name]; if (!value) { return fallback; } return ['1', 'true', 'yes', 'on'].includes(value.trim().toLowerCase()); } function parsePositiveInt(raw, fallback) { const value = Number.parseInt(String(raw ?? ''), 10); return Number.isFinite(value) && value > 0 ? value : fallback; } function parseArgs(argv) { const config = { apiBaseUrl: process.env.GENARRATIVE_HEALTH_PATROL_API_BASE_URL || 'http://127.0.0.1:8082', spacetimeBaseUrl: process.env.GENARRATIVE_HEALTH_PATROL_SPACETIME_BASE_URL || 'http://127.0.0.1:3101', publicBaseUrl: process.env.GENARRATIVE_HEALTH_PATROL_PUBLIC_BASE_URL || process.env.GENARRATIVE_HEALTH_PATROL_API_BASE_URL || 'http://127.0.0.1:8082', publicPaths: [], statusFile: process.env.GENARRATIVE_HEALTH_PATROL_STATUS_FILE || '', timeoutMs: parsePositiveInt( process.env.GENARRATIVE_HEALTH_PATROL_TIMEOUT_MS, 5000, ), slowMs: parsePositiveInt( process.env.GENARRATIVE_HEALTH_PATROL_SLOW_MS, 3000, ), failOnWarning: readBoolEnv('GENARRATIVE_HEALTH_PATROL_FAIL_ON_WARNING'), skipJournal: readBoolEnv('GENARRATIVE_HEALTH_PATROL_SKIP_JOURNAL'), json: false, webhookUrl: process.env.GENARRATIVE_HEALTH_PATROL_WEBHOOK_URL || '', }; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; switch (arg) { case '-h': case '--help': usage(); process.exit(0); break; case '--api-base-url': config.apiBaseUrl = requireValue(argv, ++index, arg); break; case '--spacetime-base-url': config.spacetimeBaseUrl = requireValue(argv, ++index, arg); break; case '--public-base-url': config.publicBaseUrl = requireValue(argv, ++index, arg); break; case '--public-path': config.publicPaths.push(requireValue(argv, ++index, arg)); break; case '--status-file': config.statusFile = requireValue(argv, ++index, arg); break; case '--timeout-ms': config.timeoutMs = parsePositiveInt(requireValue(argv, ++index, arg), 5000); break; case '--slow-ms': config.slowMs = parsePositiveInt(requireValue(argv, ++index, arg), 3000); break; case '--fail-on-warning': config.failOnWarning = true; break; case '--skip-journal': config.skipJournal = true; break; case '--json': config.json = true; break; default: throw new Error(`未知参数: ${arg}`); } } if (config.publicPaths.length === 0) { config.publicPaths = DEFAULT_PUBLIC_PATHS; } return config; } function requireValue(argv, index, flag) { const value = argv[index]; if (!value || value.startsWith('--')) { throw new Error(`${flag} 缺少参数值`); } return value; } function joinUrl(baseUrl, path) { const base = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl; const suffix = path.startsWith('/') ? path : `/${path}`; return `${base}${suffix}`; } function maxStatus(checks) { return checks.reduce((current, check) => { return STATUS_RANK[check.status] > STATUS_RANK[current] ? check.status : current; }, 'OK'); } function checkResult(name, status, summary, details = {}) { return { name, status, summary, ...details, }; } function runCommand(command, args, timeoutMs) { return new Promise((resolve) => { execFile( command, args, { timeout: timeoutMs, windowsHide: true, maxBuffer: 256 * 1024, }, (error, stdout, stderr) => { resolve({ command: [command, ...args].join(' '), code: typeof error?.code === 'number' ? error.code : error ? 1 : 0, signal: error?.signal || '', stdout: String(stdout || ''), stderr: String(stderr || ''), timedOut: Boolean(error?.killed), error: error ? error.message : '', }); }, ); }); } async function checkService(serviceName, timeoutMs) { const result = await runCommand( 'systemctl', ['is-active', serviceName], timeoutMs, ); const state = result.stdout.trim() || result.stderr.trim() || result.error; if (result.code === 0 && state === 'active') { return checkResult(`service:${serviceName}`, 'OK', 'active', { command: result.command, }); } return checkResult( `service:${serviceName}`, 'CRITICAL', `服务状态异常: ${state || `exit ${result.code}`}`, { command: result.command, stderr: result.stderr.trim(), }, ); } function requestUrl(url, timeoutMs) { return new Promise((resolve) => { const startedAt = Date.now(); const parsed = new URL(url); const client = parsed.protocol === 'https:' ? https : http; const request = client.request( parsed, { method: 'GET', timeout: timeoutMs, headers: { 'User-Agent': 'genarrative-health-patrol/1.0', Accept: 'application/json,text/plain,*/*', }, }, (response) => { let body = ''; response.setEncoding('utf8'); response.on('data', (chunk) => { if (body.length < 2048) { body += chunk; } }); response.on('end', () => { resolve({ elapsedMs: Date.now() - startedAt, statusCode: response.statusCode || 0, body: body.slice(0, 2048), }); }); }, ); request.on('timeout', () => { request.destroy(new Error(`timeout after ${timeoutMs}ms`)); }); request.on('error', (error) => { resolve({ elapsedMs: Date.now() - startedAt, error: error.message, }); }); request.end(); }); } async function checkHttp(name, url, config) { const result = await requestUrl(url, config.timeoutMs); const curlCommand = `curl -fsS --max-time ${Math.ceil(config.timeoutMs / 1000)} ${url}`; if (result.error) { return checkResult(name, 'CRITICAL', `请求失败: ${result.error}`, { command: curlCommand, elapsedMs: result.elapsedMs, }); } const ok = result.statusCode >= 200 && result.statusCode < 300; if (!ok) { return checkResult( name, 'CRITICAL', `HTTP ${result.statusCode},耗时 ${result.elapsedMs}ms`, { command: curlCommand, elapsedMs: result.elapsedMs, body: result.body.trim(), }, ); } if (result.elapsedMs > config.slowMs) { return checkResult( name, 'WARNING', `HTTP ${result.statusCode} 但耗时偏高: ${result.elapsedMs}ms`, { command: curlCommand, elapsedMs: result.elapsedMs, }, ); } return checkResult(name, 'OK', `HTTP ${result.statusCode} ${result.elapsedMs}ms`, { command: curlCommand, elapsedMs: result.elapsedMs, }); } async function checkRecentJournal(config) { const args = [ '-u', 'genarrative-api.service', '-u', 'spacetimedb.service', '-u', 'nginx.service', '--since', '15 minutes ago', '-p', 'err..alert', '--no-pager', '-o', 'short-iso', '-n', '20', ]; const result = await runCommand('journalctl', args, config.timeoutMs); if (result.code !== 0) { return checkResult('journal:recent-errors', 'WARNING', '无法读取最近错误日志', { command: result.command, stderr: result.stderr.trim() || result.error, }); } const lines = result.stdout .split('\n') .map((line) => line.trim()) .filter((line) => line && line !== '-- No entries --'); if (lines.length === 0) { return checkResult('journal:recent-errors', 'OK', '最近 15 分钟无 err..alert 日志', { command: result.command, }); } return checkResult( 'journal:recent-errors', 'WARNING', `最近 15 分钟有 ${lines.length} 条 err..alert 日志`, { command: result.command, lines, }, ); } async function writeStatusFile(statusFile, payload) { if (!statusFile) { return; } await mkdir(dirname(statusFile), {recursive: true}); await writeFile(statusFile, `${JSON.stringify(payload, null, 2)}\n`, 'utf8'); } async function notifyWebhook(config, payload) { if (!config.webhookUrl || payload.status === 'OK') { return; } const body = JSON.stringify(payload); const parsed = new URL(config.webhookUrl); const client = parsed.protocol === 'https:' ? https : http; await new Promise((resolve) => { const request = client.request( parsed, { method: 'POST', timeout: config.timeoutMs, headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body), }, }, (response) => { response.resume(); response.on('end', resolve); }, ); request.on('timeout', () => { request.destroy(new Error(`timeout after ${config.timeoutMs}ms`)); }); request.on('error', (error) => { console.error(`[health-patrol] webhook notify failed: ${error.message}`); resolve(); }); request.end(body); }); } function printText(payload) { console.log(`[health-patrol] ${payload.status} ${payload.checkedAt}`); for (const check of payload.checks) { console.log(`[${check.status}] ${check.name}: ${check.summary}`); if (check.command && check.status !== 'OK') { console.log(` command: ${check.command}`); } if (check.stderr) { console.log(` stderr: ${check.stderr}`); } if (check.body) { console.log(` body: ${check.body}`); } if (Array.isArray(check.lines) && check.lines.length > 0) { for (const line of check.lines) { console.log(` ${line}`); } } } } async function main() { const config = parseArgs(process.argv.slice(2)); const checks = []; for (const serviceName of DEFAULT_SERVICES) { checks.push(await checkService(serviceName, config.timeoutMs)); } checks.push(await checkHttp('api:/healthz', joinUrl(config.apiBaseUrl, '/healthz'), config)); checks.push(await checkHttp('api:/readyz', joinUrl(config.apiBaseUrl, '/readyz'), config)); checks.push( await checkHttp( 'spacetimedb:/v1/ping', joinUrl(config.spacetimeBaseUrl, '/v1/ping'), config, ), ); for (const path of config.publicPaths) { checks.push( await checkHttp(`public:${path}`, joinUrl(config.publicBaseUrl, path), config), ); } if (!config.skipJournal) { checks.push(await checkRecentJournal(config)); } const payload = { status: maxStatus(checks), checkedAt: new Date().toISOString(), host: process.env.HOSTNAME || '', checks, }; await writeStatusFile(config.statusFile, payload); await notifyWebhook(config, payload); if (config.json) { console.log(JSON.stringify(payload, null, 2)); } else { printText(payload); } if (payload.status === 'CRITICAL') { process.exit(2); } if (payload.status === 'WARNING' && config.failOnWarning) { process.exit(1); } } main().catch((error) => { console.error(`[health-patrol] CRITICAL ${error instanceof Error ? error.message : String(error)}`); process.exit(2); });