diff --git a/.hermes/shared-memory/development-workflow.md b/.hermes/shared-memory/development-workflow.md index eca1d5b0..3710e85b 100644 --- a/.hermes/shared-memory/development-workflow.md +++ b/.hermes/shared-memory/development-workflow.md @@ -95,7 +95,7 @@ npm run dev:admin-web 开发态 `npm run dev` / `npm run dev:api-server` 默认打开 `GENARRATIVE_DEV_PASSWORD_ENTRY_AUTO_REGISTER_ENABLED=true`,密码入口可以直接注册未知手机号账号;生产默认仍关闭该开关。 -生产 `Genarrative-Stdb-Module-Publish` 的备份默认使用 `DATABASE_BACKUP_MODE=async`:流水线在 publish 成功并退出维护模式后只触发服务器上的 `genarrative-database-backup.service`,避免低带宽 OSS 上传长时间占住部署窗口。需要强制在 publish 前等待备份并让失败阻断发布时,手动选择 `DATABASE_BACKUP_MODE=sync`;已有其他备份窗口且明确接受风险时才选择 `skip`。 +生产 `Genarrative-Stdb-Module-Publish` 的备份默认使用 `DATABASE_BACKUP_MODE=async`:流水线在 publish 前先生成本地冷备份,随后继续 publish,并把同一份发布前备份交给后台 Node 进程上传 OSS,避免低带宽 OSS 上传长时间占住部署窗口。需要强制在 publish 前等待打包和上传并让失败阻断发布时,手动选择 `DATABASE_BACKUP_MODE=sync`;已有其他备份窗口且明确接受风险时才选择 `skip`。 查看本地 Rust/SpacetimeDB 日志: diff --git a/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md b/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md index 8671e006..be23148d 100644 --- a/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md +++ b/docs/【开发运维】本地开发验证与生产运维-2026-05-15.md @@ -201,7 +201,7 @@ UI 相关修改要重点验证: npm run database:backup:oss -- --data-dir /stdb --stop-service spacetimedb.service ``` -脚本会将数据目录打包成 `tar.gz`,上传到 `oss://///-.tar.gz`。生产建议做冷备份:传入 `--stop-service spacetimedb.service`,脚本会在打包前停止服务、打包后恢复服务,再上传 OSS。由于 OSS 上传可能受服务器带宽限制,`Genarrative-Stdb-Module-Publish` 默认使用 `DATABASE_BACKUP_MODE=async`,在 publish 成功并退出维护模式后通过 `systemctl start --no-block genarrative-database-backup.service` 触发服务器后台备份,不等待上传完成,也不因备份上传耗时阻塞发布;需要强一致发布闸门时改用 `DATABASE_BACKUP_MODE=sync`(等价脚本参数 `--backup-mode sync`),备份会在 publish 前同步执行,失败会阻断 publish;确认已有其他备份窗口时才使用 `DATABASE_BACKUP_MODE=skip`(兼容脚本参数 `--skip-backup`)。若业务不能接受停机窗口,应先规划 SpacetimeDB 原生快照或主备策略,不要直接在写入中的数据目录上做热拷贝并当作强一致备份。 +脚本会将数据目录打包成 `tar.gz`,上传到 `oss://///-.tar.gz`。生产建议做冷备份:传入 `--stop-service spacetimedb.service`,脚本会在打包前停止服务、打包后恢复服务,再上传 OSS。由于 OSS 上传可能受服务器带宽限制,`Genarrative-Stdb-Module-Publish` 默认使用 `DATABASE_BACKUP_MODE=async`:先在 publish 前用 `--defer-upload` 生成本地冷备份和 `.manifest.json`,随后继续执行 publish;发布脚本退出前会用后台 `node ... --upload-archive ` 上传同一份发布前备份,不等待上传完成。需要强一致发布闸门时改用 `DATABASE_BACKUP_MODE=sync`(等价脚本参数 `--backup-mode sync`),备份会在 publish 前同步打包并上传,失败会阻断 publish;确认已有其他备份窗口时才使用 `DATABASE_BACKUP_MODE=skip`(兼容脚本参数 `--skip-backup`)。若业务不能接受停机窗口,应先规划 SpacetimeDB 原生快照或主备策略,不要直接在写入中的数据目录上做热拷贝并当作强一致备份。 生产环境变量模板在 `deploy/env/api-server.env.example`: diff --git a/jenkins/Jenkinsfile.production-stdb-module-publish b/jenkins/Jenkinsfile.production-stdb-module-publish index 8fa44889..ab6cad21 100644 --- a/jenkins/Jenkinsfile.production-stdb-module-publish +++ b/jenkins/Jenkinsfile.production-stdb-module-publish @@ -27,7 +27,7 @@ pipeline { string(name: 'SPACETIME_ROOT_DIR', defaultValue: '/stdb', description: 'spacetime CLI root-dir;需与自托管 spacetimedb.service 一致') string(name: 'SPACETIME_RUN_AS_USER', defaultValue: 'spacetimedb', description: '执行 spacetime publish 的本机用户,默认使用自托管服务用户') booleanParam(name: 'CLEAR_DATABASE', defaultValue: false, description: '是否清空数据库后发布') - choice(name: 'DATABASE_BACKUP_MODE', choices: ['async', 'sync', 'skip'], description: '数据库备份策略:async 在 publish 成功后触发服务器 systemd 备份并继续;sync 在 publish 前等待备份完成且失败阻断;skip 跳过') + choice(name: 'DATABASE_BACKUP_MODE', choices: ['async', 'sync', 'skip'], description: '数据库备份策略:async 在 publish 前生成本地冷备份、后台上传 OSS;sync 在 publish 前等待上传完成且失败阻断;skip 跳过') } stages { @@ -143,7 +143,18 @@ pipeline { if (!(backupMode in ['async', 'sync', 'skip'])) { error("DATABASE_BACKUP_MODE 只能是 async、sync 或 skip: ${backupMode}") } - def backupArg = "--backup-mode \"${backupMode}\"" + def publishScriptPath = 'scripts/deploy/production-stdb-publish.sh' + def publishScriptText = readFile(file: publishScriptPath, encoding: 'UTF-8') + def publishScriptSupportsBackupMode = publishScriptText.contains('--backup-mode') + def backupArg = '' + if (publishScriptSupportsBackupMode) { + backupArg = "--backup-mode \"${backupMode}\"" + } else if (backupMode == 'sync') { + error('当前工作区的 scripts/deploy/production-stdb-publish.sh 还不支持 --backup-mode,无法保证 sync 语义。请先更新工作区脚本后再运行。') + } else { + echo "[Jenkins] 当前工作区脚本还不支持 --backup-mode,async/skip 自动降级为 --skip-backup,避免参数不兼容导致发布失败。" + backupArg = '--skip-backup' + } def rootArg = "--root-dir \"${params.SPACETIME_ROOT_DIR?.trim() ? params.SPACETIME_ROOT_DIR.trim() : '/stdb'}\"" def runAsArg = params.SPACETIME_RUN_AS_USER?.trim() ? "--run-as-user \"${params.SPACETIME_RUN_AS_USER.trim()}\"" diff --git a/scripts/database-backup-to-oss.mjs b/scripts/database-backup-to-oss.mjs index 9e7bf2e7..5eac405b 100644 --- a/scripts/database-backup-to-oss.mjs +++ b/scripts/database-backup-to-oss.mjs @@ -20,10 +20,12 @@ const UNSIGNED_PAYLOAD = 'UNSIGNED-PAYLOAD'; function usage() { console.log(`用法: npm run database:backup:oss -- [--data-dir ] [--work-dir ] [--bucket ] [--object-prefix ] [--keep-local] - node scripts/database-backup-to-oss.mjs [--stop-service spacetimedb.service] + node scripts/database-backup-to-oss.mjs [--stop-service spacetimedb.service] [--defer-upload] + node scripts/database-backup-to-oss.mjs --upload-archive 说明: 将 SpacetimeDB 数据目录打包成 .tar.gz,并上传到阿里云 OSS 指定 bucket。 + --defer-upload 只生成本地冷备份和 manifest,不上传;后续用 --upload-archive 异步上传。 默认读取 .env / .env.local / .env.secrets.local;生产服务可传 --env-file /etc/genarrative/api-server.env。 shell 环境变量优先级最高,不会被 env 文件覆盖。 @@ -100,6 +102,11 @@ function parseArgs(argv) { stopService: '', database: '', dryRun: false, + deferUpload: false, + uploadArchive: '', + manifestFile: '', + objectKey: '', + resultFile: '', }; for (let index = 0; index < argv.length; index += 1) { @@ -134,6 +141,9 @@ function parseArgs(argv) { case '--object-prefix': options.objectPrefix = readValue(); break; + case '--object-key': + options.objectKey = readValue(); + break; case '--access-key-id': options.accessKeyId = readValue(); break; @@ -155,6 +165,19 @@ function parseArgs(argv) { case '--dry-run': options.dryRun = true; break; + case '--defer-upload': + options.deferUpload = true; + options.keepLocal = true; + break; + case '--upload-archive': + options.uploadArchive = readValue(); + break; + case '--manifest-file': + options.manifestFile = readValue(); + break; + case '--result-file': + options.resultFile = readValue(); + break; default: throw new Error(`未知参数: ${arg}`); } @@ -260,6 +283,17 @@ function createArchive({dataDir, workDir, fileName}) { return archivePath; } +function writeManifest({manifestPath, payload}) { + writeFileSync(manifestPath, `${JSON.stringify(payload, null, 2)}\n`, 'utf8'); +} + +function readManifest(manifestPath) { + if (!existsSync(manifestPath)) { + throw new Error(`备份清单不存在: ${manifestPath}`); + } + return JSON.parse(readFileSync(manifestPath, 'utf8')); +} + function hmac(key, content, encoding) { return createHmac('sha256', key).update(content).digest(encoding); } @@ -372,6 +406,59 @@ async function uploadArchive({archivePath, bucket, endpoint, objectKey, accessKe }; } +async function uploadExistingArchive({args, env, bucket, endpoint, accessKeyId, accessKeySecret, objectPrefix}) { + const archivePath = resolvePath(args.uploadArchive); + if (!existsSync(archivePath)) { + throw new Error(`待上传备份文件不存在: ${archivePath}`); + } + + const manifestPath = resolvePath(args.manifestFile || `${archivePath}.manifest.json`); + const manifest = existsSync(manifestPath) ? readManifest(manifestPath) : {}; + const dataDir = firstNonEmpty(manifest.dataDir, env.GENARRATIVE_DATABASE_BACKUP_DATA_DIR, DEFAULT_PRODUCTION_DATA_DIR); + const database = firstNonEmpty(args.database, manifest.database, env.GENARRATIVE_SPACETIME_DATABASE, basename(dataDir)); + const objectKey = firstNonEmpty(args.objectKey, manifest.objectKey, buildBackupNames({database, dataDir, objectPrefix}).objectKey); + + console.log(`[database-backup] 上传已有备份: ${archivePath}`); + console.log(`[database-backup] 目标对象: oss://${bucket}/${objectKey}`); + + if (args.dryRun) { + console.log('[database-backup] dry-run,仅校验上传配置。'); + return; + } + + const result = await uploadArchive({archivePath, bucket, endpoint, objectKey, accessKeyId, accessKeySecret}); + console.log(`[database-backup] 上传完成: ${JSON.stringify(result)}`); + + const uploadedAt = new Date().toISOString(); + writeManifest({ + manifestPath, + payload: { + ...manifest, + database, + bucket: result.bucket, + objectKey: result.objectKey, + contentLength: result.contentLength, + etag: result.etag, + uploadedAt, + uploadStatus: 'uploaded', + }, + }); + + if (args.resultFile) { + writeFileSync(resolvePath(args.resultFile), `${JSON.stringify({archivePath, manifestPath, ...result, uploadedAt}, null, 2)}\n`, 'utf8'); + } + + const keepLocal = args.keepLocal || String(env.GENARRATIVE_DATABASE_BACKUP_KEEP_LOCAL ?? '').trim().toLowerCase() === 'true'; + if (!keepLocal) { + rmSync(archivePath, {force: true}); + rmSync(manifestPath, {force: true}); + console.log('[database-backup] 已删除本地临时备份文件;如需保留请设置 --keep-local。'); + } else { + console.log(`[database-backup] 已保留本地备份: ${archivePath}`); + console.log(`[database-backup] 已保留备份清单: ${manifestPath}`); + } +} + async function main() { const args = parseArgs(process.argv.slice(2)); const env = loadEffectiveEnv(args.envFiles); @@ -400,6 +487,11 @@ async function main() { } } + if (args.uploadArchive) { + await uploadExistingArchive({args, env, bucket, endpoint, accessKeyId, accessKeySecret, objectPrefix}); + return; + } + const {fileName, objectKey} = buildBackupNames({database, dataDir, objectPrefix}); console.log(`[database-backup] 数据目录: ${dataDir}`); console.log(`[database-backup] 本地临时目录: ${workDir}`); @@ -419,22 +511,47 @@ async function main() { startServiceIfNeeded(args.stopService || firstNonEmpty(env.GENARRATIVE_DATABASE_BACKUP_STOP_SERVICE), serviceStopped); } + const manifestPath = `${archivePath}.manifest.json`; + writeManifest({ + manifestPath, + payload: { + createdAt: new Date().toISOString(), + database, + dataDir, + bucket, + objectKey, + archivePath, + uploadStatus: args.deferUpload ? 'deferred' : 'pending', + }, + }); + + if (args.deferUpload) { + console.log(`[database-backup] 已生成本地冷备份,延后上传: ${archivePath}`); + console.log(`[database-backup] 已写入备份清单: ${manifestPath}`); + if (args.resultFile) { + writeFileSync(resolvePath(args.resultFile), `${JSON.stringify({archivePath, manifestPath, bucket, objectKey}, null, 2)}\n`, 'utf8'); + } + return; + } + const result = await uploadArchive({archivePath, bucket, endpoint, objectKey, accessKeyId, accessKeySecret}); console.log(`[database-backup] 上传完成: ${JSON.stringify(result)}`); - const manifestPath = `${archivePath}.manifest.json`; - writeFileSync( + writeManifest({ manifestPath, - `${JSON.stringify({ + payload: { createdAt: new Date().toISOString(), + database, dataDir, bucket: result.bucket, objectKey: result.objectKey, + archivePath, contentLength: result.contentLength, etag: result.etag, - }, null, 2)}\n`, - 'utf8', - ); + uploadedAt: new Date().toISOString(), + uploadStatus: 'uploaded', + }, + }); if (!keepLocal) { rmSync(archivePath, {force: true}); diff --git a/scripts/deploy/production-stdb-publish.sh b/scripts/deploy/production-stdb-publish.sh index 472a075c..21b4e68f 100644 --- a/scripts/deploy/production-stdb-publish.sh +++ b/scripts/deploy/production-stdb-publish.sh @@ -12,7 +12,7 @@ usage() { 默认使用 http://127.0.0.1:3101,避免与部署机本机 Git/Web 服务的 3000 端口冲突。 默认使用 /stdb 作为 spacetime CLI root-dir,并以 spacetimedb 用户发布,避免 root CLI 身份污染自托管实例。 发布时固定追加 --no-config,只使用显式参数,避免工作区或用户目录里的 spacetime 配置干扰目标。 - 默认在 publish 成功后异步触发 genarrative-database-backup.service,避免低带宽 OSS 上传阻塞部署。 + async 模式会在 publish 前先做本地冷备份,再在 publish 完成后后台上传 OSS,避免低带宽上传阻塞部署。 如需强制等待备份完成并在失败时阻断 publish,传入 --backup-mode sync。 失败时保留维护模式。 EOF @@ -48,6 +48,11 @@ CLEAR_DATABASE=0 BACKUP_MODE="${GENARRATIVE_STDB_PUBLISH_BACKUP_MODE:-async}" DEPLOY_COMPLETED=0 PUBLISH_TMP_DIR="" +ASYNC_BACKUP_STATUS_FILE="" +ASYNC_BACKUP_SCRIPT="" +ASYNC_BACKUP_ARCHIVE="" +ASYNC_BACKUP_MANIFEST="" +ASYNC_BACKUP_LOG="" while [[ $# -gt 0 ]]; do case "$1" in @@ -137,6 +142,9 @@ fi on_exit() { local exit_code=$? + if [[ "${BACKUP_MODE}" == "async" && -n "${ASYNC_BACKUP_STATUS_FILE}" && -f "${ASYNC_BACKUP_STATUS_FILE}" ]]; then + start_async_backup_upload || true + fi if [[ -n "${PUBLISH_TMP_DIR}" && -d "${PUBLISH_TMP_DIR}" ]]; then rm -rf "${PUBLISH_TMP_DIR}" fi @@ -148,25 +156,58 @@ on_exit() { trap on_exit EXIT -trigger_async_backup() { - # Jenkins 发布路径不能被低带宽 OSS 上传长时间占住;默认只把已安装的 systemd - # oneshot 备份任务排队启动。必须放在 publish 成功后,避免冷备份停止 SpacetimeDB - # 与 spacetime publish 同时争用 spacetimedb.service。 - if command -v systemctl >/dev/null 2>&1 && systemctl list-unit-files genarrative-database-backup.service --no-legend | grep -q '^genarrative-database-backup\.service'; then - echo "[production-stdb-publish] 异步触发数据库 OSS 备份,不等待上传完成" - if ! systemctl start --no-block genarrative-database-backup.service; then - echo "[production-stdb-publish] 警告:异步触发数据库备份失败;继续发布,请检查 genarrative-database-backup.service 日志" >&2 - fi - else - echo "[production-stdb-publish] 警告:未找到 genarrative-database-backup.service,跳过异步备份触发" >&2 +prepare_async_backup() { + ASYNC_BACKUP_SCRIPT="${SCRIPT_DIR}/../database-backup-to-oss.mjs" + if [[ ! -f "${ASYNC_BACKUP_SCRIPT}" ]]; then + ASYNC_BACKUP_SCRIPT="${SOURCE_DIR}/scripts/database-backup-to-oss.mjs" fi + if [[ ! -f "${ASYNC_BACKUP_SCRIPT}" ]]; then + echo "[production-stdb-publish] 缺少数据库备份脚本: ${ASYNC_BACKUP_SCRIPT}" >&2 + exit 1 + fi + + ASYNC_BACKUP_STATUS_FILE="$(mktemp /tmp/genarrative-stdb-backup-status.XXXXXX.json)" + echo "[production-stdb-publish] publish 前生成本地冷备份,随后会异步上传 OSS" + node "${ASYNC_BACKUP_SCRIPT}" \ + --env-file /etc/genarrative/api-server.env \ + --data-dir "${SPACETIME_ROOT_DIR}" \ + --database "${DATABASE}" \ + --stop-service spacetimedb.service \ + --defer-upload \ + --result-file "${ASYNC_BACKUP_STATUS_FILE}" +} + +start_async_backup_upload() { + if [[ -z "${ASYNC_BACKUP_STATUS_FILE}" || ! -f "${ASYNC_BACKUP_STATUS_FILE}" ]]; then + echo "[production-stdb-publish] 警告:未找到可上传的本地备份状态文件,跳过异步上传" >&2 + return 0 + fi + + ASYNC_BACKUP_ARCHIVE="$(node -e 'const fs=require("node:fs"); const p=process.argv[1]; const o=JSON.parse(fs.readFileSync(p,"utf8")); process.stdout.write(o.archivePath || "");' "${ASYNC_BACKUP_STATUS_FILE}")" + ASYNC_BACKUP_MANIFEST="$(node -e 'const fs=require("node:fs"); const p=process.argv[1]; const o=JSON.parse(fs.readFileSync(p,"utf8")); process.stdout.write(o.manifestPath || "");' "${ASYNC_BACKUP_STATUS_FILE}")" + if [[ -z "${ASYNC_BACKUP_ARCHIVE}" || -z "${ASYNC_BACKUP_MANIFEST}" ]]; then + echo "[production-stdb-publish] 警告:备份状态文件缺少 archivePath 或 manifestPath,跳过异步上传" >&2 + return 0 + fi + + mkdir -p "$(dirname "${ASYNC_BACKUP_ARCHIVE}")" + ASYNC_BACKUP_LOG="$(dirname "${ASYNC_BACKUP_ARCHIVE}")/${DATABASE}-upload.log" + echo "[production-stdb-publish] 后台上传本地备份到 OSS: ${ASYNC_BACKUP_ARCHIVE}" + nohup node "${ASYNC_BACKUP_SCRIPT}" \ + --env-file /etc/genarrative/api-server.env \ + --upload-archive "${ASYNC_BACKUP_ARCHIVE}" \ + --manifest-file "${ASYNC_BACKUP_MANIFEST}" \ + >"${ASYNC_BACKUP_LOG}" 2>&1 & + echo "[production-stdb-publish] OSS 后台上传日志: ${ASYNC_BACKUP_LOG}" + rm -f "${ASYNC_BACKUP_STATUS_FILE}" + ASYNC_BACKUP_STATUS_FILE="" } "${SCRIPT_DIR}/maintenance-on.sh" "spacetime module publish ${DATABASE}" case "${BACKUP_MODE}" in async) - echo "[production-stdb-publish] 将在 publish 成功后异步触发数据库 OSS 备份" + prepare_async_backup ;; sync) BACKUP_SCRIPT="${SCRIPT_DIR}/../database-backup-to-oss.mjs" @@ -253,8 +294,4 @@ fi "${SCRIPT_DIR}/maintenance-off.sh" DEPLOY_COMPLETED=1 -if [[ "${BACKUP_MODE}" == "async" ]]; then - trigger_async_backup -fi - echo "[production-stdb-publish] 完成"