fix(jenkins): upload otelcol archive for server provision

This commit is contained in:
2026-05-19 15:58:04 +08:00
parent f7edc8b1ec
commit 79af97dedd
4 changed files with 69 additions and 7 deletions

View File

@@ -95,6 +95,14 @@
- 验证方式Jenkins 构建机可完成工具包准备release 部署 agent 只消费工作区文件;目标机不再依赖 GitHub 外网下载。
- 关联文档:`docs/【开发运维】本地开发验证与生产运维-2026-05-15.md`
## 2026-05-19 otelcol-contrib 改为 Jenkins 手动上传归档再解包
- 背景Genarrative-Server-Provision 中 `otelcol-contrib` 的构建机下载步骤耗时较长,且本机已经提前准备好安装包。
- 决策:`jenkins/Jenkinsfile.production-server-provision` 新增 `OTELCOL_CONTRIB_ARCHIVE` 手动上传参数,默认要求上传 `otelcol-contrib_0.151.0_linux_amd64.tar.gz``scripts/prepare-server-provision-tools.sh` 优先从上传归档解包生成 `provision-tools/otelcol-contrib`,不再默认联网下载 OpenTelemetry release 包。
- 影响范围:`jenkins/Jenkinsfile.production-server-provision``scripts/prepare-server-provision-tools.sh``docs/【开发运维】本地开发验证与生产运维-2026-05-15.md`
- 验证方式Jenkins 日志应显示使用手动上传的 otelcol 包,`MANIFEST.txt` 记录 source 为 manual archive`ENABLE_OTELCOL=false` 时可以跳过 collector 工具包准备。
- 关联文档:`docs/【开发运维】本地开发验证与生产运维-2026-05-15.md`
## 2026-05-19 公开 gallery 入口发布限流以快拒绝保护后端
- 背景:容器 2C / 2G 压测中,公开作品列表在约 5000 HTTP req/s 目标下可以保持 200 请求低延迟,但 SpacetimeDB 内存会随 api-server 重连和高压请求累积到容器上限附近。

View File

@@ -160,7 +160,7 @@ Windows Stdb module 构建流水线运行在 Jenkins `windows` 节点上。该
- `api-server` 生产模板默认 `GENARRATIVE_API_LISTEN_BACKLOG=1024``GENARRATIVE_API_WORKER_THREADS=4`;本地未设置 worker threads 时继续使用 Tokio 默认值。
- `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512` 开启应用内 HTTP 并发背压;`GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=320``GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS=64``GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS=16` 分别限制公开列表、公开详情和后台 API 热路径。超过许可时直接返回 `429 Too Many Requests``Retry-After: 1``/healthz` 不受该限制。这些值不是 RPS 限速;如果压测中 429 上升但内存和 p95 收敛,说明背压正在保护进程。直连 `api-server` 的极高 RPS 压测若出现 `connection refused`,通常已经打到 TCP 监听 / accept 层,应同时检查 backlog、Nginx upstream keepalive 和前置限流。
- `genarrative-api.service` 设置 `LimitNOFILE=65535``TasksMax=2048`;上线后用 `systemctl show genarrative-api.service -p LimitNOFILE -p TasksMax``cat /proc/$(pidof api-server)/limits` 核对。
- Server provision 不在目标机下载 SpacetimeDB 或 `otelcol-contrib`。Jenkins 的 `Prepare Provision Tools` 阶段在 `linux && genarrative-build` 构建机执行 `scripts/prepare-server-provision-tools.sh`,通过官方 SpacetimeDB 安装入口 `https://install.spacetimedb.com` 和 OpenTelemetry release 包生成 `provision-tools/`,再通过 `stash/unstash` 上传到 release 部署 agent。目标机上的 `scripts/jenkins-server-provision.sh` 只从该工作区工具包安装 `/stdb/spacetime``/stdb/bin/current/*``/usr/local/bin/otelcol-contrib`。注意 `scripts/jenkins-checkout-source.sh` 会执行 `git reset --hard` / `git clean`,因此被直接执行的新增脚本必须以 Git `100755` 模式提交,或在二次 checkout 之后再补 `chmod +x`
- Server provision 不在目标机下载 SpacetimeDB 或 `otelcol-contrib`。Jenkins 的 `Prepare Provision Tools` 阶段在 `linux && genarrative-build` 构建机执行 `scripts/prepare-server-provision-tools.sh`SpacetimeDB 仍通过官方安装入口 `https://install.spacetimedb.com` 准备;`otelcol-contrib` 默认要求在 Jenkins 参数 `OTELCOL_CONTRIB_ARCHIVE` 手动上传 `otelcol-contrib_0.151.0_linux_amd64.tar.gz`,再从上传包解出 `provision-tools/otelcol-contrib`。最终工具包通过 `stash/unstash` 上传到 release 部署 agent。目标机上的 `scripts/jenkins-server-provision.sh` 只从该工作区工具包安装 `/stdb/spacetime``/stdb/bin/current/*``/usr/local/bin/otelcol-contrib`。注意 `scripts/jenkins-checkout-source.sh` 会执行 `git reset --hard` / `git clean`,因此被直接执行的新增脚本必须以 Git `100755` 模式提交,或在二次 checkout 之后再补 `chmod +x`
- `otelcol-contrib.service` 作为可选系统服务加入 provision默认监听 `127.0.0.1:4317/4318` 并使用 `deploy/otelcol/genarrative-debug.yaml`。api-server 是否发送 OTLP 仍由 `GENARRATIVE_OTEL_ENABLED` 控制,服务 unit 见 `deploy/systemd/otelcol-contrib.service`
- Nginx `/api/``/admin/api/` 通过 `genarrative_api` upstream 代理到 `127.0.0.1:8082`upstream keepalive 为 64`limit_conn` 负责连接 / 并发保护,`limit_req` 负责入口 RPS 快拒绝。当前模板把公开 gallery list 单独放到 `genarrative_gallery_rps`,默认 `rate=5000r/s``burst=4096``limit_conn=320`;公开详情和普通 API 放到 `genarrative_api_rps`,后台 API 放到 `genarrative_admin_rps`。压测时看 `/var/log/nginx/genarrative.access.log` 中的 `request_time``upstream_connect_time``upstream_header_time``upstream_response_time``upstream_status``request_id`
- 作品列表 K6 脚本一次 iteration 默认请求两个公开接口,因此约 50 HTTP req/s 的目标命令使用 `SCENARIO=spike START_RPS=5 PEAK_RPS=25 HOLD=60s END_RPS=5 DETAIL_RATIO=0 npm run loadtest:k6:works`

View File

@@ -34,6 +34,7 @@ pipeline {
booleanParam(name: 'ENABLE_SERVICES', defaultValue: true, description: '启用并启动 spacetimedb 与 api-server systemd 服务')
booleanParam(name: 'ENABLE_OTELCOL', defaultValue: true, description: '安装并启用本机 OpenTelemetry Collectorapi-server 模板默认开启 OTLP如需关闭请在 API_ENV_FILE 中将 GENARRATIVE_OTEL_ENABLED 改为 false')
string(name: 'OTELCOL_VERSION', defaultValue: '0.151.0', description: 'otelcol-contrib 版本')
stashedFile 'OTELCOL_CONTRIB_ARCHIVE'
}
stages {
@@ -72,6 +73,19 @@ pipeline {
if (!(params.OTELCOL_VERSION?.trim() ==~ /^[0-9]+\.[0-9]+\.[0-9]+$/)) {
error("OTELCOL_VERSION 格式应为 x.y.z: ${params.OTELCOL_VERSION}")
}
def otelcolArchiveFilename = env.OTELCOL_CONTRIB_ARCHIVE_FILENAME?.trim()
def expectedOtelcolArchiveFilename = "otelcol-contrib_${params.OTELCOL_VERSION.trim()}_linux_amd64.tar.gz"
if (params.ENABLE_OTELCOL) {
if (!otelcolArchiveFilename) {
error("ENABLE_OTELCOL=true 时必须在 OTELCOL_CONTRIB_ARCHIVE 上传 ${expectedOtelcolArchiveFilename}。")
}
if (otelcolArchiveFilename != expectedOtelcolArchiveFilename) {
error("OTELCOL_CONTRIB_ARCHIVE 文件名必须是 ${expectedOtelcolArchiveFilename},当前上传: ${otelcolArchiveFilename}")
}
}
if (!params.ENABLE_OTELCOL && otelcolArchiveFilename) {
echo "ENABLE_OTELCOL=false已上传的 OTELCOL_CONTRIB_ARCHIVE 将不会被安装。"
}
if (!params.SPACETIME_DOWNLOAD_ROOT?.trim()) {
error('SPACETIME_DOWNLOAD_ROOT 不能为空。')
}
@@ -129,9 +143,28 @@ pipeline {
# jenkins-checkout-source.sh 会 reset/clean 到目标 commit前面的临时 chmod 可能被 Git mode 还原;
# 直接执行脚本前在二次 checkout 之后再补执行位,避免 Linux agent 报 Permission denied。
chmod +x scripts/prepare-server-provision-tools.sh
BASH
'''
script {
if (params.ENABLE_OTELCOL) {
echo "准备使用手动上传的 otelcol-contrib 包: ${env.OTELCOL_CONTRIB_ARCHIVE_FILENAME}"
sh 'bash -lc "rm -rf manual-provision-tool-upload && mkdir -p manual-provision-tool-upload"'
dir('manual-provision-tool-upload') {
unstash 'OTELCOL_CONTRIB_ARCHIVE'
}
env.OTELCOL_ARCHIVE_SOURCE = 'manual-provision-tool-upload/OTELCOL_CONTRIB_ARCHIVE'
} else {
env.OTELCOL_ARCHIVE_SOURCE = ''
}
}
sh '''
bash <<'BASH'
set -euo pipefail
PROVISION_TOOLS_DIR="${PROVISION_TOOLS_DIR:-provision-tools}" \
OTELCOL_VERSION="${OTELCOL_VERSION:-0.151.0}" \
PREPARE_OTELCOL="${ENABLE_OTELCOL:-true}" \
OTELCOL_ARCHIVE_SOURCE="${OTELCOL_ARCHIVE_SOURCE:-}" \
SPACETIME_DOWNLOAD_ROOT="${SPACETIME_DOWNLOAD_ROOT:-https://github.com/clockworklabs/SpacetimeDB/releases/latest/download}" \
scripts/prepare-server-provision-tools.sh
BASH
@@ -195,8 +228,10 @@ BASH
sh '''
bash <<'BASH'
set -euo pipefail
chmod +x "${PROVISION_TOOLS_DIR:-provision-tools}/otelcol-contrib" \
"${PROVISION_TOOLS_DIR:-provision-tools}/spacetime/spacetime" \
if [[ "${ENABLE_OTELCOL:-true}" == "true" ]]; then
chmod +x "${PROVISION_TOOLS_DIR:-provision-tools}/otelcol-contrib"
fi
chmod +x "${PROVISION_TOOLS_DIR:-provision-tools}/spacetime/spacetime" \
"${PROVISION_TOOLS_DIR:-provision-tools}/spacetime/bin/current/spacetimedb-cli" \
"${PROVISION_TOOLS_DIR:-provision-tools}/spacetime/bin/current/spacetimedb-standalone"
chmod +x scripts/jenkins-server-provision.sh

View File

@@ -3,11 +3,14 @@ set -euo pipefail
PROVISION_TOOLS_DIR="${PROVISION_TOOLS_DIR:-provision-tools}"
OTELCOL_VERSION="${OTELCOL_VERSION:-0.151.0}"
PREPARE_OTELCOL="${PREPARE_OTELCOL:-${ENABLE_OTELCOL:-true}}"
OTELCOL_ARCHIVE_SOURCE="${OTELCOL_ARCHIVE_SOURCE:-}"
OTELCOL_DOWNLOAD_ROOT="${OTELCOL_DOWNLOAD_ROOT:-https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download}"
SPACETIME_INSTALLER_URL="${SPACETIME_INSTALLER_URL:-https://install.spacetimedb.com}"
SPACETIME_DOWNLOAD_ROOT="${SPACETIME_DOWNLOAD_ROOT:-https://github.com/clockworklabs/SpacetimeDB/releases/latest/download}"
PROVISION_TOOLS_TMP_PARENT="${PROVISION_TOOLS_TMP_PARENT:-${WORKSPACE:-$(pwd)}/.tmp/server-provision-tools}"
TMP_DIR_TO_CLEAN=""
OTELCOL_SOURCE_DESCRIPTION="skipped"
cleanup_tmp_dir() {
if [[ -n "${TMP_DIR_TO_CLEAN}" ]]; then
@@ -54,13 +57,25 @@ prepare_otelcol() {
local archive="${tmp_dir}/otelcol-contrib.tar.gz"
local extract_dir="${tmp_dir}/otelcol-contrib"
local url="${OTELCOL_DOWNLOAD_ROOT}/v${OTELCOL_VERSION}/otelcol-contrib_${OTELCOL_VERSION}_linux_amd64.tar.gz"
local source_archive="${OTELCOL_ARCHIVE_SOURCE}"
local target="${PROVISION_TOOLS_DIR}/otelcol-contrib"
require_cmd tar
echo "[prepare-provision-tools] 下载 otelcol-contrib: ${url}"
mkdir -p "${extract_dir}"
download_file "${url}" "${archive}"
if [[ -n "${source_archive}" ]]; then
if [[ ! -f "${source_archive}" ]]; then
echo "[prepare-provision-tools] 上传的 otelcol-contrib 包不存在: ${source_archive}" >&2
exit 1
fi
echo "[prepare-provision-tools] 使用手动上传的 otelcol-contrib 包: ${source_archive}"
cp "${source_archive}" "${archive}"
OTELCOL_SOURCE_DESCRIPTION="manual archive ${source_archive}"
else
echo "[prepare-provision-tools] 下载 otelcol-contrib: ${url}"
download_file "${url}" "${archive}"
OTELCOL_SOURCE_DESCRIPTION="download ${url}"
fi
tar -xzf "${archive}" -C "${extract_dir}"
if [[ ! -x "${extract_dir}/otelcol-contrib" ]]; then
@@ -116,11 +131,15 @@ main() {
rm -rf "${PROVISION_TOOLS_DIR}"
mkdir -p "${PROVISION_TOOLS_DIR}"
prepare_otelcol "${tmp_dir}"
if [[ "${PREPARE_OTELCOL}" == "true" ]]; then
prepare_otelcol "${tmp_dir}"
else
echo "[prepare-provision-tools] PREPARE_OTELCOL=${PREPARE_OTELCOL},跳过 otelcol-contrib 工具包准备。"
fi
prepare_spacetime "${tmp_dir}"
cat >"${PROVISION_TOOLS_DIR}/MANIFEST.txt" <<EOF
otelcol-contrib ${OTELCOL_VERSION}
otelcol-contrib ${OTELCOL_VERSION} ${OTELCOL_SOURCE_DESCRIPTION}
spacetime installer ${SPACETIME_INSTALLER_URL}
spacetime download root ${SPACETIME_DOWNLOAD_ROOT}
EOF