feat: workerize external generation

2026-06-05 17:29:08 +08:00
parent 5150925947
commit 8d54ea3374
60 changed files with 5285 additions and 700 deletions
--- a/deploy/container/README.md
+++ b/deploy/container/README.md
@@ -9,11 +9,12 @@ Docker Compose
 ├─ spacetimedb  :3101，独立数据卷，供 api-server 连接
 ├─ nginx        :80 -> api-server:8082，负责静态站点、/admin/、/api/ 反代、upstream timing log、连接限制
 ├─ api-server   :8082，Linux release 构建，连接 compose 内 SpacetimeDB
+├─ external-generation-worker，独立 worker 进程，消费 external_generation_job 队列
 ├─ otelcol      :4317/4318，debug exporter，接收 traces / metrics / logs
 └─ k6           profile=loadtest 时临时启动，在 compose 网络内压 nginx
 ```

-当前容器模拟参数按 `genarrative-release` 服务器采样值收口为 2 vCPU / 2 GiB RAM / 4096 soft nofile / 768 worker_connections，并已在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=896m`、`api-server cpus=2.0 mem_limit=1g`、`nginx cpus=0.5 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=1.0 mem_limit=512m`。SpacetimeDB 同时设置 `--page_pool_max_size=402653184`，给 reducer、订阅与运行时保留更多非 page pool 内存。
+当前容器模拟参数按 `genarrative-release` 服务器采样值收口为 2 vCPU / 2 GiB RAM / 4096 soft nofile / 768 worker_connections，并已在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=896m`、`api-server cpus=2.0 mem_limit=1g`、`external-generation-worker cpus=2.0 mem_limit=1g`、`nginx cpus=0.5 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=1.0 mem_limit=512m`。SpacetimeDB 同时设置 `--page_pool_max_size=402653184`，给 reducer、订阅与运行时保留更多非 page pool 内存。
 容器 `api-server` 默认 `GENARRATIVE_API_WORKER_THREADS=4`，用于让 Tokio 在 2 vCPU 配额内有更多 I/O 调度 worker；该值不会突破 compose 里的 `cpus=2.0` CPU 上限。
 Collector 镜像使用 `otel/opentelemetry-collector-contrib:0.151.0`。
 生产服务器若启用 Collector，则由 `deploy/systemd/otelcol-contrib.service` 和 `deploy/otelcol/genarrative-debug.yaml` 托管，不走容器镜像。
@@ -74,6 +75,7 @@ curl -sS http://127.0.0.1:18080/api/runtime/puzzle/gallery
 ```bash
 npm run container:logs -- nginx
 npm run container:logs -- api-server
+npm run container:logs -- external-generation-worker
 npm run container:logs -- otelcol
 ```

--- a/deploy/container/api-server.env.example
+++ b/deploy/container/api-server.env.example
@@ -8,6 +8,12 @@ GENARRATIVE_API_PORT=8082
 GENARRATIVE_API_LOG=info,tower_http=info
 GENARRATIVE_API_LISTEN_BACKLOG=1024
 GENARRATIVE_API_WORKER_THREADS=4
+# 容器 smoke 可临时设 all；压测或预发按 api / external-generation-worker 拆进程。
+GENARRATIVE_PROCESS_ROLE=api
+GENARRATIVE_EXTERNAL_GENERATION_WORKER_ID=
+GENARRATIVE_EXTERNAL_GENERATION_WORKER_CONCURRENCY=2
+GENARRATIVE_EXTERNAL_GENERATION_WORKER_POLL_INTERVAL_MS=2000
+GENARRATIVE_EXTERNAL_GENERATION_WORKER_LEASE_SECONDS=3600
 GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
 GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=320
 GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS=64
--- a/deploy/container/docker-compose.loadtest.yml
+++ b/deploy/container/docker-compose.loadtest.yml
@@ -69,6 +69,32 @@ services:
      retries: 12
      start_period: 20s

+  external-generation-worker:
+    build:
+      context: ../..
+      dockerfile: deploy/container/api-server.Dockerfile
+      target: api-runtime
+    cpus: "2.0"
+    mem_limit: 1g
+    env_file:
+      - ./api-server.env
+    environment:
+      GENARRATIVE_PROCESS_ROLE: external-generation-worker
+      GENARRATIVE_TRACKING_OUTBOX_DIR: /var/lib/genarrative/tracking-outbox-worker
+      OTEL_EXPORTER_OTLP_ENDPOINT: http://otelcol:4318
+      OTEL_SERVICE_NAME: genarrative-external-generation-worker
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    ulimits:
+      nofile:
+        soft: 4096
+        hard: 4096
+    depends_on:
+      spacetimedb:
+        condition: service_healthy
+      otelcol:
+        condition: service_started
+
  nginx:
    build:
      context: ../..