feat(api-server): add container loadtest observability

This commit is contained in:
kdletters
2026-05-17 20:52:15 +08:00
parent 73f937d78a
commit 5a4a8a4892
36 changed files with 1325 additions and 30 deletions

132
deploy/container/README.md Normal file
View File

@@ -0,0 +1,132 @@
# Genarrative 容器化压测与隔离部署方案
本目录只服务本机或预发的容器化模拟压测,不替换当前生产 `systemd + Nginx + Jenkins` 发布路径。生产服务器仍以 `deploy/systemd/``deploy/nginx/``scripts/jenkins-*.sh``scripts/deploy/production-api-deploy.sh` 为准。
## 拓扑
```text
Docker Compose
├─ nginx :80 -> api-server:8082负责静态站点、/admin/、/api/ 反代、upstream timing log、连接限制
├─ api-server :8082Linux release 构建,连接外部 SpacetimeDB
├─ otelcol :4317/4318debug exporter接收 traces / metrics / logs
└─ k6 profile=loadtest 时临时启动,在 compose 网络内压 nginx
```
默认 host 端口:
- `http://127.0.0.1:18080`:容器 Nginx。
- `127.0.0.1:4317` / `127.0.0.1:4318`:容器 Collector OTLP gRPC / HTTP。
如端口冲突,可设置:
```powershell
$env:GENARRATIVE_CONTAINER_HTTP_PORT="18081"
$env:GENARRATIVE_CONTAINER_OTLP_HTTP_PORT="14318"
$env:GENARRATIVE_CONTAINER_OTLP_GRPC_PORT="14317"
```
## 初始化
```bash
npm run container:init
```
该命令会从 `deploy/container/api-server.env.example` 生成本地 `deploy/container/api-server.env`。真实 token、库名和外部服务密钥只写本地 env 文件,不提交 Git。
Docker Desktop 下默认通过 `host.docker.internal:3101` 连接宿主机上 `npm run dev` 启动的 SpacetimeDB
```env
GENARRATIVE_SPACETIME_SERVER_URL=http://host.docker.internal:3101
GENARRATIVE_SPACETIME_DATABASE=genarrative-loadtest
GENARRATIVE_SPACETIME_TOKEN=
```
Linux Docker Engine 如果不能解析 `host.docker.internal`Compose 已配置 `host-gateway`;仍不通时把 `GENARRATIVE_SPACETIME_SERVER_URL` 改成宿主机网关 IP 或同网络内的 SpacetimeDB 地址。
## 启动与验证
```bash
npm run container:config
npm run container:build
npm run container:up
npm run container:ps
curl -sS http://127.0.0.1:18080/api/runtime/puzzle/gallery
```
查看日志:
```bash
npm run container:logs -- nginx
npm run container:logs -- api-server
npm run container:logs -- otelcol
```
`npm run container:config` 默认只校验配置,不打印完整 env。排查 compose 展开结果时可临时使用:
```bash
npm run container:config -- --print
```
如果 `deploy/container/api-server.env` 已写入真实 token不要把完整展开结果贴到公开渠道。
停止:
```bash
npm run container:down
```
如需同时清理容器卷:
```bash
npm run container:down -- -v
```
## 压测
k6 在 compose 网络内访问 `http://nginx`,避免 Windows 本机直连连接模型干扰 Linux 容器结果:
```bash
npm run container:k6
```
作品列表脚本一次 iteration 默认请求两个公开列表接口,因此目标 500 HTTP req/s 对应 `PEAK_RPS=250`
```powershell
$env:SCENARIO="spike"
$env:START_RPS="25"
$env:PEAK_RPS="250"
$env:HOLD="60s"
$env:END_RPS="25"
$env:PREALLOCATED_VUS="100"
$env:MAX_VUS="500"
$env:DETAIL_RATIO="0"
npm run container:k6
```
如果要压 1000 HTTP req/s`PEAK_RPS` 调到 `500`;如果要压 5000 HTTP req/s`PEAK_RPS` 调到 `2500`,并同时提高 `PREALLOCATED_VUS` / `MAX_VUS`观察是否先被带宽、Nginx `limit_conn` 或 api-server 背压限制。
## OTLP
容器内 `otelcol` 默认使用 debug exporter。开启 api-server OTEL
```env
GENARRATIVE_OTEL_ENABLED=true
OTEL_EXPORTER_OTLP_ENDPOINT=http://otelcol:4318
```
然后重建或重启容器:
```bash
npm run container:up
npm run container:logs -- otelcol
```
Collector 日志会输出 traces / metrics / logs。接 Rider、Jaeger、Tempo、Prometheus、Grafana 或托管平台时,另建独立 Collector 配置,不直接改生产 systemd 或 Nginx 模板。
## 隔离边界
- 不改生产 systemd 单元。
- 不改 Jenkins 发布主流程。
- 不要求真实 HTTPS 证书。
- 不把真实 `.env``.env.local``.env.secrets.local``deploy/container/api-server.env` 放入 Docker build context。
- 不在容器镜像里内置 SpacetimeDB 数据或 token。

View File

@@ -0,0 +1,49 @@
FROM rust:1.88-bookworm AS rust-builder
WORKDIR /workspace
COPY server-rs ./server-rs
RUN cargo build --release -p api-server --manifest-path server-rs/Cargo.toml && \
cp server-rs/target/release/api-server /tmp/api-server
FROM debian:bookworm-slim AS api-runtime
WORKDIR /srv/genarrative
RUN apt-get update && \
apt-get install -y --no-install-recommends ca-certificates curl && \
rm -rf /var/lib/apt/lists/* && \
useradd --system --create-home --home-dir /srv/genarrative --shell /usr/sbin/nologin genarrative
COPY --from=rust-builder /tmp/api-server /usr/local/bin/api-server
RUN mkdir -p /var/lib/genarrative/auth && \
chown -R genarrative:genarrative /srv/genarrative /var/lib/genarrative
USER genarrative
EXPOSE 8082
ENV GENARRATIVE_ENV=container \
GENARRATIVE_API_HOST=0.0.0.0 \
GENARRATIVE_API_PORT=8082 \
GENARRATIVE_AUTH_STORE_PATH=/var/lib/genarrative/auth/auth-store.json
CMD ["api-server"]
FROM node:22-bookworm-slim AS web-builder
WORKDIR /workspace
COPY package.json package-lock.json ./
COPY apps/admin-web/package.json ./apps/admin-web/package.json
RUN npm ci
COPY index.html metadata.json tsconfig.json vite.config.ts ./
COPY src ./src
COPY public ./public
COPY media ./media
COPY packages ./packages
COPY apps/admin-web ./apps/admin-web
RUN npm run build:raw && npm run admin-web:build
FROM nginx:1.27-alpine AS nginx-runtime
COPY --from=web-builder /workspace/dist /srv/genarrative/web
COPY --from=web-builder /workspace/apps/admin-web/dist /srv/genarrative/web/admin
COPY deploy/container/nginx.conf /etc/nginx/nginx.conf

View File

@@ -0,0 +1,35 @@
# 复制为 deploy/container/api-server.env 后填入本机或预发值。
# 该文件只用于容器隔离方案,不参与 systemd/Jenkins 生产部署。
# 不要在这里写真实 token 后提交 Git。
GENARRATIVE_ENV=container
GENARRATIVE_API_HOST=0.0.0.0
GENARRATIVE_API_PORT=8082
GENARRATIVE_API_LOG=info,tower_http=info
GENARRATIVE_API_LISTEN_BACKLOG=1024
GENARRATIVE_API_WORKER_THREADS=4
GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
GENARRATIVE_OTEL_ENABLED=false
OTEL_SERVICE_NAME=genarrative-api
OTEL_EXPORTER_OTLP_ENDPOINT=http://otelcol:4318
OTEL_RESOURCE_ATTRIBUTES=deployment.environment=container,service.namespace=genarrative
GENARRATIVE_INTERNAL_API_SECRET=CHANGE_ME_FOR_CONTAINER
GENARRATIVE_JWT_ISSUER=genarrative-container
GENARRATIVE_JWT_SECRET=CHANGE_ME_FOR_CONTAINER
AUTH_REFRESH_COOKIE_SECURE=false
GENARRATIVE_AUTH_STORE_PATH=/var/lib/genarrative/auth/auth-store.json
# Docker Desktop 下连接宿主机 npm run dev 启动的 SpacetimeDB。
# Linux Docker Engine 可改成宿主机网关 IP或在 compose 里接入同一网络内的 SpacetimeDB。
GENARRATIVE_SPACETIME_SERVER_URL=http://host.docker.internal:3101
GENARRATIVE_SPACETIME_DATABASE=genarrative-loadtest
GENARRATIVE_SPACETIME_TOKEN=
GENARRATIVE_SPACETIME_POOL_SIZE=8
GENARRATIVE_SPACETIME_PROCEDURE_TIMEOUT_SECONDS=45
GENARRATIVE_LLM_PROVIDER=openai-compatible
GENARRATIVE_LLM_BASE_URL=
GENARRATIVE_LLM_API_KEY=
GENARRATIVE_LLM_MODEL=

View File

@@ -0,0 +1,85 @@
name: genarrative-container-loadtest
services:
api-server:
build:
context: ../..
dockerfile: deploy/container/api-server.Dockerfile
target: api-runtime
env_file:
- ./api-server.env
environment:
GENARRATIVE_API_HOST: 0.0.0.0
GENARRATIVE_API_PORT: 8082
OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4318
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- api-auth-store:/var/lib/genarrative/auth
depends_on:
otelcol:
condition: service_started
healthcheck:
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8082/healthz"]
interval: 10s
timeout: 3s
retries: 12
start_period: 20s
nginx:
build:
context: ../..
dockerfile: deploy/container/api-server.Dockerfile
target: nginx-runtime
depends_on:
api-server:
condition: service_healthy
ports:
- "${GENARRATIVE_CONTAINER_HTTP_PORT:-18080}:80"
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- nginx-logs:/var/log/nginx
healthcheck:
test: ["CMD", "wget", "-qO-", "http://127.0.0.1/api/runtime/puzzle/gallery"]
interval: 10s
timeout: 5s
retries: 12
start_period: 20s
otelcol:
image: otel/opentelemetry-collector-contrib:0.125.0
command: ["--config=/etc/otelcol/config.yaml"]
volumes:
- ./otelcol.yaml:/etc/otelcol/config.yaml:ro
ports:
- "${GENARRATIVE_CONTAINER_OTLP_GRPC_PORT:-4317}:4317"
- "${GENARRATIVE_CONTAINER_OTLP_HTTP_PORT:-4318}:4318"
k6:
image: grafana/k6:0.52.0
profiles: ["loadtest"]
depends_on:
nginx:
condition: service_healthy
environment:
BASE_URL: http://nginx
WORKS_DATA: data/works-list.sample.json
SCENARIO: ${SCENARIO:-spike}
START_RPS: ${START_RPS:-5}
PEAK_RPS: ${PEAK_RPS:-250}
HOLD: ${HOLD:-60s}
END_RPS: ${END_RPS:-5}
PREALLOCATED_VUS: ${PREALLOCATED_VUS:-100}
MAX_VUS: ${MAX_VUS:-500}
DETAIL_RATIO: ${DETAIL_RATIO:-0}
SLEEP_MIN_SECONDS: ${SLEEP_MIN_SECONDS:-0}
SLEEP_MAX_SECONDS: ${SLEEP_MAX_SECONDS:-0}
volumes:
- ../../scripts/loadtest:/scripts/loadtest:ro
working_dir: /scripts/loadtest
command: ["run", "k6-works-list.js"]
volumes:
api-auth-store:
nginx-logs:

133
deploy/container/nginx.conf Normal file
View File

@@ -0,0 +1,133 @@
worker_processes auto;
events {
worker_connections 4096;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format genarrative_upstream
'$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" "$http_user_agent" '
'request_time=$request_time upstream_connect_time=$upstream_connect_time '
'upstream_header_time=$upstream_header_time upstream_response_time=$upstream_response_time '
'upstream_status=$upstream_status request_id=$request_id';
upstream genarrative_api {
server api-server:8082;
keepalive 64;
}
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
sendfile on;
keepalive_timeout 65;
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 5;
gzip_min_length 1024;
gzip_types
text/plain
text/css
text/javascript
application/javascript
application/json
application/xml
application/xml+rss
image/svg+xml;
server {
listen 80;
server_name _;
access_log /var/log/nginx/genarrative.access.log genarrative_upstream;
error_log /var/log/nginx/genarrative.error.log warn;
limit_conn_status 429;
limit_conn_log_level warn;
root /srv/genarrative/web;
index index.html;
location ^~ /admin/api/ {
default_type application/json;
limit_conn genarrative_api_conn 64;
proxy_pass http://genarrative_api/admin/api/;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Request-Id $request_id;
}
location = /admin {
return 301 /admin/;
}
location ^~ /admin/assets/ {
try_files $uri =404;
}
location ^~ /admin/ {
try_files $uri $uri/ /admin/index.html;
}
location ^~ /assets/ {
try_files $uri =404;
}
location ~ ^/api(?:/|$) {
default_type application/json;
limit_conn genarrative_api_conn 64;
proxy_pass http://genarrative_api;
proxy_http_version 1.1;
proxy_buffering off;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
add_header X-Accel-Buffering no always;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Request-Id $request_id;
}
location ~ ^/(generated-|healthz) {
return 404;
}
location ~ ^/v1/database/[^/]+/subscribe$ {
proxy_pass http://host.docker.internal:3101;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header Host $host;
proxy_read_timeout 3600s;
}
location ^~ /v1/identity {
proxy_pass http://host.docker.internal:3101;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header Host $host;
}
location ^~ /v1/ {
return 404;
}
location / {
try_files $uri $uri/ /index.html;
}
}
}

View File

@@ -0,0 +1,23 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
exporters:
debug:
verbosity: detailed
service:
pipelines:
traces:
receivers: [otlp]
exporters: [debug]
metrics:
receivers: [otlp]
exporters: [debug]
logs:
receivers: [otlp]
exporters: [debug]

View File

@@ -13,11 +13,15 @@ upstream genarrative_api {
keepalive 64;
}
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
server {
listen 80;
server_name genarrative.example.com;
access_log /var/log/nginx/genarrative.access.log genarrative_upstream;
error_log /var/log/nginx/genarrative.error.log warn;
limit_conn_status 429;
limit_conn_log_level warn;
gzip on;
gzip_vary on;
@@ -43,6 +47,7 @@ server {
location ^~ /admin/api/ {
default_type application/json;
limit_conn genarrative_api_conn 64;
if ($genarrative_maintenance) {
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
@@ -83,6 +88,7 @@ server {
# 临时兼容主站仍在使用的 /api/* HTTP facade前端完成 SpacetimeDB SDK 迁移后删除。
location ~ ^/api(?:/|$) {
default_type application/json;
limit_conn genarrative_api_conn 64;
if ($genarrative_maintenance) {
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';

View File

@@ -11,11 +11,15 @@ upstream genarrative_api {
keepalive 64;
}
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
server {
listen 80;
server_name genarrative.example.com;
access_log /var/log/nginx/genarrative.access.log genarrative_upstream;
error_log /var/log/nginx/genarrative.error.log warn;
limit_conn_status 429;
limit_conn_log_level warn;
location /.well-known/acme-challenge/ {
root /var/www/html;
@@ -59,6 +63,7 @@ server {
location ^~ /admin/api/ {
default_type application/json;
limit_conn genarrative_api_conn 64;
if ($genarrative_maintenance) {
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
@@ -99,6 +104,7 @@ server {
# 临时兼容主站仍在使用的 /api/* HTTP facade前端完成 SpacetimeDB SDK 迁移后删除。
location ~ ^/api(?:/|$) {
default_type application/json;
limit_conn genarrative_api_conn 64;
if ($genarrative_maintenance) {
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';