完善外部生成Worker动态扩缩容
新增外部生成controller进程角色与systemd服务 补齐队列统计procedure与spacetime-client绑定 更新生产部署脚本、健康巡检和server provision的worker/controller口径 新增容器worker smoke脚本并同步运维文档与团队记忆
This commit is contained in:
@@ -5,11 +5,11 @@ set -euo pipefail
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
用法:
|
||||
./scripts/deploy/production-api-deploy.sh --source-dir build/<version> [--version <version>] [--release-root /opt/genarrative/releases] [--current-link /opt/genarrative/current] [--service genarrative-api.service] [--worker-service-pattern 'genarrative-external-generation-worker@*.service'] [--no-worker-services] [--health-url http://127.0.0.1:8082/readyz] [--api-env-file /etc/genarrative/api-server.env] [--database genarrative-prod] [--spacetime-server-url http://127.0.0.1:3101]
|
||||
./scripts/deploy/production-api-deploy.sh --source-dir build/<version> [--version <version>] [--release-root /opt/genarrative/releases] [--current-link /opt/genarrative/current] [--service genarrative-api.service] [--worker-service-pattern 'genarrative-external-generation-worker@*.service'] [--no-worker-services] [--worker-controller-service genarrative-external-generation-controller.service] [--no-worker-controller] [--health-url http://127.0.0.1:8082/readyz] [--api-env-file /etc/genarrative/api-server.env] [--database genarrative-prod] [--spacetime-server-url http://127.0.0.1:3101]
|
||||
|
||||
说明:
|
||||
进入维护模式,校验并发布 api-server 单文件,更新 current 链接,重启 systemd 服务并执行 readiness 检查。
|
||||
默认同时重启已加载的外部生成 worker 实例;未启用 worker 单元时会自动跳过。
|
||||
默认同时重启外部生成 worker controller 和已加载的 worker 实例;未启用 worker 单元时会自动跳过。
|
||||
若传入 --database,会在重启前把 GENARRATIVE_SPACETIME_DATABASE 写入 api-server 环境文件,避免服务继续读取旧库。
|
||||
失败时保留维护模式。
|
||||
EOF
|
||||
@@ -317,6 +317,43 @@ wait_for_worker_services() {
|
||||
return 1
|
||||
}
|
||||
|
||||
ensure_worker_controller_service() {
|
||||
local service="$1"
|
||||
|
||||
if [[ -z "${service}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! systemctl cat "${service}" >/dev/null 2>&1; then
|
||||
echo "[production-api-deploy] 缺少外部生成 worker controller systemd 单元: ${service}" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "[production-api-deploy] 启用并重启外部生成 worker controller: ${service}"
|
||||
systemctl enable "${service}"
|
||||
systemctl restart "${service}"
|
||||
}
|
||||
|
||||
wait_for_worker_controller_service() {
|
||||
local service="$1"
|
||||
|
||||
if [[ -z "${service}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "[production-api-deploy] 等待外部生成 worker controller active: ${service}"
|
||||
for _ in {1..30}; do
|
||||
if systemctl is-active --quiet "${service}"; then
|
||||
return 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
systemctl --no-pager --full status "${service}" || true
|
||||
echo "[production-api-deploy] 外部生成 worker controller 未在超时时间内进入 active,发布失败。" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SOURCE_DIR=""
|
||||
VERSION=""
|
||||
@@ -324,6 +361,7 @@ RELEASE_ROOT="/opt/genarrative/releases"
|
||||
CURRENT_LINK="/opt/genarrative/current"
|
||||
SERVICE_NAME="genarrative-api.service"
|
||||
WORKER_SERVICE_PATTERN="genarrative-external-generation-worker@*.service"
|
||||
WORKER_CONTROLLER_SERVICE="genarrative-external-generation-controller.service"
|
||||
HEALTH_URL="http://127.0.0.1:8082/readyz"
|
||||
API_ENV_FILE="/etc/genarrative/api-server.env"
|
||||
DATABASE=""
|
||||
@@ -364,6 +402,14 @@ while [[ $# -gt 0 ]]; do
|
||||
WORKER_SERVICE_PATTERN=""
|
||||
shift
|
||||
;;
|
||||
--worker-controller-service)
|
||||
WORKER_CONTROLLER_SERVICE="${2:?缺少 --worker-controller-service 的值}"
|
||||
shift 2
|
||||
;;
|
||||
--no-worker-controller)
|
||||
WORKER_CONTROLLER_SERVICE=""
|
||||
shift
|
||||
;;
|
||||
--health-url)
|
||||
HEALTH_URL="${2:?缺少 --health-url 的值}"
|
||||
shift 2
|
||||
@@ -488,6 +534,8 @@ echo "[production-api-deploy] 重启服务: ${SERVICE_NAME}"
|
||||
systemctl restart "${SERVICE_NAME}"
|
||||
restart_worker_services "${WORKER_SERVICE_PATTERN}"
|
||||
wait_for_worker_services "${WORKER_SERVICE_PATTERN}"
|
||||
ensure_worker_controller_service "${WORKER_CONTROLLER_SERVICE}"
|
||||
wait_for_worker_controller_service "${WORKER_CONTROLLER_SERVICE}"
|
||||
|
||||
echo "[production-api-deploy] 等待 readiness: ${HEALTH_URL}"
|
||||
for _ in {1..30}; do
|
||||
|
||||
Reference in New Issue
Block a user