perf(api-server): tune gallery load shedding
This commit is contained in:
@@ -107,6 +107,22 @@
|
|||||||
- 验证:对照打 `/api/runtime/puzzle/gallery` 与 `/healthz`;对比 `PREALLOCATED_VUS=300 MAX_VUS=800` 和 `PREALLOCATED_VUS=20 MAX_VUS=40`;压测结束后继续采样 10 秒确认 private memory 回落。
|
- 验证:对照打 `/api/runtime/puzzle/gallery` 与 `/healthz`;对比 `PREALLOCATED_VUS=300 MAX_VUS=800` 和 `PREALLOCATED_VUS=20 MAX_VUS=40`;压测结束后继续采样 10 秒确认 private memory 回落。
|
||||||
- 关联:`scripts/loadtest/README.md`、`docs/【开发运维】本地开发验证与生产运维-2026-05-15.md`、`server-rs/crates/api-server/src/process_metrics.rs`、`server-rs/crates/api-server/src/telemetry.rs`。
|
- 关联:`scripts/loadtest/README.md`、`docs/【开发运维】本地开发验证与生产运维-2026-05-15.md`、`server-rs/crates/api-server/src/process_metrics.rs`、`server-rs/crates/api-server/src/telemetry.rs`。
|
||||||
|
|
||||||
|
## 容器高 VU 下 `/healthz` RSS 尖峰先查 Axum state 深拷贝
|
||||||
|
|
||||||
|
- 现象:容器 Linux release `api-server` 打 `/healthz`,500 HTTP req/s、`PREALLOCATED_VUS=100` 只跑 1 秒也能把 RSS 推到约 1 GiB;同样问题与作品列表、SpacetimeDB procedure、业务 cache 和请求日志等级无关。
|
||||||
|
- 原因:`AppState` 曾直接 `#[derive(Clone)]` 大结构体,里面包含配置、SpacetimeDB client、平台服务、认证服务和多组 cache。Axum/Hyper 会在 router/service/connection 路径频繁 clone state,高并发 keepalive 下会放大为状态深拷贝高水位。
|
||||||
|
- 处理:`server-rs/crates/api-server/src/state.rs` 的 `AppState` 必须保持 `Arc<AppStateInner>` 浅拷贝壳;新增共享状态字段时放入 `AppStateInner`,不要把外层改回大结构体 clone。
|
||||||
|
- 验证:用容器内 k6 直连 `api-server:8082/healthz`,500 HTTP req/s、`PREALLOCATED_VUS=100`、30 秒压测后采样 `/proc/$pid/status`、`/proc/$pid/smaps_rollup` 和 cgroup `memory.current/memory.peak`。2026-05-18 修复后结果为 `15001` 请求、`http_req_failed=0`、`dropped_iterations=0`,RSS 约 18 MiB -> 52 MiB,cgroup peak 约 47 MiB。
|
||||||
|
- 关联:`server-rs/crates/api-server/src/state.rs`、`deploy/container/README.md`、`deploy/container/api-server.Dockerfile`。
|
||||||
|
|
||||||
|
## Gallery 压测延迟升高先查入口过量放行和 TTL 边界刷新
|
||||||
|
|
||||||
|
- 现象:公开作品列表在 500-1000 HTTP req/s 附近可能吞吐没有明显提升,但 p95 变高、VU 上升,甚至出现排队和 dropped iterations。
|
||||||
|
- 原因:Nginx、Axum 和缓存刷新边界如果同时允许过多请求进入,压力会先堆在连接、service 和 cache rebuild 周围;这类延迟不等同于数据库连接池不足。
|
||||||
|
- 处理:Nginx 按 endpoint 使用 `limit_req` 快拒绝,api-server 按 `default/gallery/detail/admin` 分组 semaphore 快拒绝;拼图广场 TTL 过期时已有缓存先返回 stale 响应,只允许一个后台 refresh 任务重建,冷启动无缓存时才同步构建。
|
||||||
|
- 验证:OTLP 看 `genarrative.http.server.request_permits.available{pool=...}`、`genarrative.puzzle_gallery.cache.stale_hits`、`refreshes_started`、`refreshes_failed`,Nginx access log 看 `request_time` 与 `upstream_response_time` 是否同步收敛;超过容量时应明确 429,而不是长时间排队或新增 502。
|
||||||
|
- 关联:`deploy/nginx/genarrative.conf`、`deploy/container/nginx.conf`、`server-rs/crates/api-server/src/backpressure.rs`、`server-rs/crates/api-server/src/puzzle_gallery_cache.rs`。
|
||||||
|
|
||||||
## 多玩法公开广场列表优先订阅 public view / read model
|
## 多玩法公开广场列表优先订阅 public view / read model
|
||||||
|
|
||||||
- 现象:抓大鹅、方洞挑战、视觉小说、大鱼吃小鱼等公开列表如果沿用 `list_*_works` procedure,即使只读已发布作品,也会在每个 HTTP 请求里回到 SpacetimeDB WASM 侧扫描、反序列化配置并组装列表,50RPS 以上容易变成热点。
|
- 现象:抓大鹅、方洞挑战、视觉小说、大鱼吃小鱼等公开列表如果沿用 `list_*_works` procedure,即使只读已发布作品,也会在每个 HTTP 请求里回到 SpacetimeDB WASM 侧扫描、反序列化配置并组装列表,50RPS 以上容易变成热点。
|
||||||
@@ -824,6 +840,14 @@
|
|||||||
- 验证:执行 `cargo test -p api-server jsapi_order_request_sets_wechat_required_http_headers --manifest-path server-rs/Cargo.toml`。
|
- 验证:执行 `cargo test -p api-server jsapi_order_request_sets_wechat_required_http_headers --manifest-path server-rs/Cargo.toml`。
|
||||||
- 关联:`server-rs/crates/api-server/src/wechat_pay.rs`、`docs/technical/MY_TAB_ACCOUNT_RECHARGE_IMPLEMENTATION_2026-04-25.md`。
|
- 关联:`server-rs/crates/api-server/src/wechat_pay.rs`、`docs/technical/MY_TAB_ACCOUNT_RECHARGE_IMPLEMENTATION_2026-04-25.md`。
|
||||||
|
|
||||||
|
## 容器公开列表压测不要靠继续抬并发吃满 CPU
|
||||||
|
|
||||||
|
- 现象:2C / 2G 容器压测公开 gallery list 时,`api-server` CPU 仍有余量,看起来像可以继续提高 `GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS` 或 Nginx `limit_conn`。
|
||||||
|
- 原因:当前瓶颈不是 Tokio worker 线程数。`/api/runtime/puzzle/gallery` 和 `/api/runtime/custom-world-gallery` 成功响应后会走全局 route tracking,继续向 SpacetimeDB 写 `record_tracking_event_and_return`;入口并发从 320 抬到 336 / 352 时,SpacetimeDB 内存先逼近 `896m` 容器上限,200 请求 p95 变差,429 比例没有改善。
|
||||||
|
- 处理:2C / 2G 容器模拟里公开 gallery list 暂以 `limit_conn=320`、`GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=320` 作为稳定上限。若要继续提升吞吐,优先减少高频公开 GET 的 tracking 写入、做采样或改成批量/异步聚合;不要单纯放大入口并发。
|
||||||
|
- 验证:宿主机 k6 打 `http://127.0.0.1:18080`,`PEAK_RPS=1000` 等价约 2000 HTTP req/s;320 档无 dropped iterations、无 5xx、无 OOM,200 请求 `request_time p95` 约 0.292s。336 / 352 档 p95 升到约 0.31s / 0.32s,SpacetimeDB 内存尾部可到约 `880MiB / 896MiB`。
|
||||||
|
- 关联:`deploy/container/nginx.conf`、`deploy/container/api-server.env.example`、`deploy/container/README.md`、`server-rs/crates/api-server/src/tracking.rs`。
|
||||||
|
|
||||||
## 后台表查询展示 SpacetimeDB 枚举时不要套用 Option 解码
|
## 后台表查询展示 SpacetimeDB 枚举时不要套用 Option 解码
|
||||||
|
|
||||||
- 现象:后台“表查询”查看 `profile_recharge_order` 时,`kind` 和 `status` 显示为空数组 `[]`,例如充值订单原始行里 `points_60` 的类型和状态都不可读。
|
- 现象:后台“表查询”查看 `profile_recharge_order` 时,`kind` 和 `status` 显示为空数组 `[]`,例如充值订单原始行里 `points_60` 的类型和状态都不可读。
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ Docker Compose
|
|||||||
└─ k6 profile=loadtest 时临时启动,在 compose 网络内压 nginx
|
└─ k6 profile=loadtest 时临时启动,在 compose 网络内压 nginx
|
||||||
```
|
```
|
||||||
|
|
||||||
当前容器模拟参数按 `genarrative-release` 服务器采样值收口为 2 vCPU / 2 GiB RAM / 4096 soft nofile / 768 worker_connections,并已在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=768m`、`api-server cpus=2.0 mem_limit=1g`、`nginx cpus=0.25 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=0.5 mem_limit=512m`。
|
当前容器模拟参数按 `genarrative-release` 服务器采样值收口为 2 vCPU / 2 GiB RAM / 4096 soft nofile / 768 worker_connections,并已在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=896m`、`api-server cpus=2.0 mem_limit=1g`、`nginx cpus=0.5 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=1.0 mem_limit=512m`。SpacetimeDB 同时设置 `--page_pool_max_size=402653184`,给 reducer、订阅与运行时保留更多非 page pool 内存。
|
||||||
|
容器 `api-server` 默认 `GENARRATIVE_API_WORKER_THREADS=4`,用于让 Tokio 在 2 vCPU 配额内有更多 I/O 调度 worker;该值不会突破 compose 里的 `cpus=2.0` CPU 上限。
|
||||||
Collector 镜像使用 `otel/opentelemetry-collector-contrib:0.151.0`。
|
Collector 镜像使用 `otel/opentelemetry-collector-contrib:0.151.0`。
|
||||||
生产服务器若启用 Collector,则由 `deploy/systemd/otelcol-contrib.service` 和 `deploy/otelcol/genarrative-debug.yaml` 托管,不走容器镜像。
|
生产服务器若启用 Collector,则由 `deploy/systemd/otelcol-contrib.service` 和 `deploy/otelcol/genarrative-debug.yaml` 托管,不走容器镜像。
|
||||||
|
|
||||||
@@ -52,6 +53,10 @@ GENARRATIVE_SPACETIME_TOKEN=
|
|||||||
|
|
||||||
Linux Docker Engine 若要从宿主机 CLI 连到容器内服务,直接用 `http://127.0.0.1:13101`;容器内部服务之间统一走 `http://spacetimedb:3101`。
|
Linux Docker Engine 若要从宿主机 CLI 连到容器内服务,直接用 `http://127.0.0.1:13101`;容器内部服务之间统一走 `http://spacetimedb:3101`。
|
||||||
|
|
||||||
|
## 构建工具链
|
||||||
|
|
||||||
|
`api-server` 容器镜像只构建 Linux release API 二进制,不构建 `spacetime-module`。当前 `api-server -> spacetime-client -> spacetimedb-sdk 2.2.0` 依赖链要求 Rust 1.93,因此 `deploy/container/api-server.Dockerfile` 的 Rust builder 固定为 `rust:1.93-bookworm`。如果本机 Docker Hub 拉取失败,可以先在本机准备同名本地 builder 镜像,但不要把临时 bootstrap 容器或私有 registry 凭据写入仓库。
|
||||||
|
|
||||||
## 启动与验证
|
## 启动与验证
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -125,7 +130,19 @@ spacetime publish genarrative-loadtest --server http://127.0.0.1:13101 --module-
|
|||||||
|
|
||||||
发布完成后再执行 `npm run container:up` 和 `npm run container:k6`。如果 `deploy/container/api-server.env` 里的 `GENARRATIVE_SPACETIME_DATABASE` 改成了别的库名,发布命令里的库名也要同步修改。
|
发布完成后再执行 `npm run container:up` 和 `npm run container:k6`。如果 `deploy/container/api-server.env` 里的 `GENARRATIVE_SPACETIME_DATABASE` 改成了别的库名,发布命令里的库名也要同步修改。
|
||||||
|
|
||||||
如果要压 1000 HTTP req/s,把 `PEAK_RPS` 调到 `500`;如果要压 5000 HTTP req/s,把 `PEAK_RPS` 调到 `2500`,并同时提高 `PREALLOCATED_VUS` / `MAX_VUS`,观察是否先被带宽、Nginx `limit_conn` 或 api-server 背压限制。
|
如果要压 1000 HTTP req/s,把 `PEAK_RPS` 调到 `500`;如果要压 5000 HTTP req/s,把 `PEAK_RPS` 调到 `2500`,并同时提高 `PREALLOCATED_VUS` / `MAX_VUS`,观察是否先被带宽、Nginx `limit_conn` / `limit_req` 或 api-server 分组背压限制。当前容器 Nginx 对公开 gallery list 使用 `genarrative_gallery_rps`,公开详情和普通 API 使用 `genarrative_api_rps`,后台 API 使用 `genarrative_admin_rps`;api-server 侧对应 `GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS`、`GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS` 和 `GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS`。
|
||||||
|
|
||||||
|
2026-05-19 的 2C / 2G 容器压测结论:公开 gallery list 的 `limit_conn=320` 与 `GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=320` 是当前较稳的上限。用宿主机 k6 打 `http://127.0.0.1:18080`,`PEAK_RPS=1000` 等价于约 2000 HTTP req/s 的两接口组合压测;320 档无 dropped iterations、无 5xx、无 OOM,约 `151710` 个 200 与 `34310` 个 429,200 请求 `request_time p95=0.292s`。继续抬到 336 / 352 不会有效吃满 api-server CPU,反而让 200 数量减少、p95 升到约 0.31s / 0.32s,SpacetimeDB 内存尾部逼近 `880MiB / 896MiB`,下游内存先到危险区。当前不要为了降低“剩余 CPU”继续抬公开列表并发;下一步应减少成功列表请求后的 SpacetimeDB tracking 写入或优化下游状态,而不是放大入口并发。
|
||||||
|
|
||||||
|
### 内存采样
|
||||||
|
|
||||||
|
排查 API 容器内存时,优先对比压测前后的 `/proc/$pid/smaps_rollup` 和 cgroup 当前/峰值,不把 Windows 任务管理器总占用当成单进程结论:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec genarrative-container-loadtest-api-server-1 sh -c 'pid=$(pidof api-server); grep VmRSS /proc/$pid/status; grep RssAnon /proc/$pid/status; cat /proc/$pid/smaps_rollup | grep Anonymous; echo cgroup_current=$(cat /sys/fs/cgroup/memory.current); echo cgroup_peak=$(cat /sys/fs/cgroup/memory.peak)'
|
||||||
|
```
|
||||||
|
|
||||||
|
`/healthz` 也能复现的内存尖峰应先按连接层、service clone 或 allocator 高水位排查,不要直接归因到 SpacetimeDB procedure、作品列表 cache 或业务 DTO。2026-05-18 验证:`AppState` 改为 `Arc<AppStateInner>` 浅拷贝后,容器内直连 `api-server:8082/healthz` 的 500 HTTP req/s、`PREALLOCATED_VUS=100`、30 秒压测完成 `15001` 次请求,`http_req_failed=0`、`dropped_iterations=0`,API 进程 RSS 从约 18 MiB 升至约 52 MiB,cgroup 峰值约 47 MiB,未再出现 1 GiB 级尖峰。
|
||||||
|
|
||||||
## OTLP
|
## OTLP
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
FROM rust:1.88-bookworm AS rust-builder
|
FROM rust:1.93-bookworm AS rust-builder
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
COPY server-rs ./server-rs
|
COPY server-rs ./server-rs
|
||||||
@@ -36,6 +36,7 @@ COPY apps/admin-web/package.json ./apps/admin-web/package.json
|
|||||||
RUN npm ci
|
RUN npm ci
|
||||||
|
|
||||||
COPY index.html metadata.json tsconfig.json vite.config.ts ./
|
COPY index.html metadata.json tsconfig.json vite.config.ts ./
|
||||||
|
COPY scripts/vite-cli.mjs scripts/admin-web-build.mjs ./scripts/
|
||||||
COPY src ./src
|
COPY src ./src
|
||||||
COPY public ./public
|
COPY public ./public
|
||||||
COPY media ./media
|
COPY media ./media
|
||||||
|
|||||||
@@ -7,8 +7,11 @@ GENARRATIVE_API_HOST=0.0.0.0
|
|||||||
GENARRATIVE_API_PORT=8082
|
GENARRATIVE_API_PORT=8082
|
||||||
GENARRATIVE_API_LOG=info,tower_http=info
|
GENARRATIVE_API_LOG=info,tower_http=info
|
||||||
GENARRATIVE_API_LISTEN_BACKLOG=1024
|
GENARRATIVE_API_LISTEN_BACKLOG=1024
|
||||||
GENARRATIVE_API_WORKER_THREADS=2
|
GENARRATIVE_API_WORKER_THREADS=4
|
||||||
GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
|
GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
|
||||||
|
GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=320
|
||||||
|
GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS=64
|
||||||
|
GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS=16
|
||||||
|
|
||||||
GENARRATIVE_OTEL_ENABLED=false
|
GENARRATIVE_OTEL_ENABLED=false
|
||||||
OTEL_SERVICE_NAME=genarrative-api
|
OTEL_SERVICE_NAME=genarrative-api
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ name: genarrative-container-loadtest
|
|||||||
services:
|
services:
|
||||||
spacetimedb:
|
spacetimedb:
|
||||||
image: clockworklabs/spacetime:v2.2.0
|
image: clockworklabs/spacetime:v2.2.0
|
||||||
|
user: root
|
||||||
command:
|
command:
|
||||||
[
|
[
|
||||||
"start",
|
"start",
|
||||||
@@ -11,11 +12,11 @@ services:
|
|||||||
"--data-dir",
|
"--data-dir",
|
||||||
"/var/lib/spacetimedb",
|
"/var/lib/spacetimedb",
|
||||||
"--page_pool_max_size",
|
"--page_pool_max_size",
|
||||||
"536870912",
|
"402653184",
|
||||||
"--non-interactive",
|
"--non-interactive",
|
||||||
]
|
]
|
||||||
cpus: "1.0"
|
cpus: "1.0"
|
||||||
mem_limit: 768m
|
mem_limit: 896m
|
||||||
ports:
|
ports:
|
||||||
- "${GENARRATIVE_CONTAINER_SPACETIME_PORT:-13101}:3101"
|
- "${GENARRATIVE_CONTAINER_SPACETIME_PORT:-13101}:3101"
|
||||||
volumes:
|
volumes:
|
||||||
@@ -73,7 +74,7 @@ services:
|
|||||||
context: ../..
|
context: ../..
|
||||||
dockerfile: deploy/container/api-server.Dockerfile
|
dockerfile: deploy/container/api-server.Dockerfile
|
||||||
target: nginx-runtime
|
target: nginx-runtime
|
||||||
cpus: "0.25"
|
cpus: "0.5"
|
||||||
mem_limit: 128m
|
mem_limit: 128m
|
||||||
depends_on:
|
depends_on:
|
||||||
api-server:
|
api-server:
|
||||||
@@ -111,7 +112,7 @@ services:
|
|||||||
k6:
|
k6:
|
||||||
image: grafana/k6:0.52.0
|
image: grafana/k6:0.52.0
|
||||||
profiles: ["loadtest"]
|
profiles: ["loadtest"]
|
||||||
cpus: "0.5"
|
cpus: "1.0"
|
||||||
mem_limit: 512m
|
mem_limit: 512m
|
||||||
depends_on:
|
depends_on:
|
||||||
nginx:
|
nginx:
|
||||||
|
|||||||
@@ -21,6 +21,9 @@ http {
|
|||||||
}
|
}
|
||||||
|
|
||||||
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_gallery_rps:10m rate=2400r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_api_rps:10m rate=300r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_admin_rps:10m rate=30r/s;
|
||||||
|
|
||||||
sendfile on;
|
sendfile on;
|
||||||
keepalive_timeout 65;
|
keepalive_timeout 65;
|
||||||
@@ -48,6 +51,8 @@ http {
|
|||||||
error_log /var/log/nginx/genarrative.error.log warn;
|
error_log /var/log/nginx/genarrative.error.log warn;
|
||||||
limit_conn_status 429;
|
limit_conn_status 429;
|
||||||
limit_conn_log_level warn;
|
limit_conn_log_level warn;
|
||||||
|
limit_req_status 429;
|
||||||
|
limit_req_log_level warn;
|
||||||
|
|
||||||
root /srv/genarrative/web;
|
root /srv/genarrative/web;
|
||||||
index index.html;
|
index index.html;
|
||||||
@@ -55,6 +60,7 @@ http {
|
|||||||
location ^~ /admin/api/ {
|
location ^~ /admin/api/ {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_admin_rps burst=16 nodelay;
|
||||||
|
|
||||||
proxy_pass http://genarrative_api/admin/api/;
|
proxy_pass http://genarrative_api/admin/api/;
|
||||||
proxy_http_version 1.1;
|
proxy_http_version 1.1;
|
||||||
@@ -82,9 +88,90 @@ http {
|
|||||||
try_files $uri =404;
|
try_files $uri =404;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/puzzle/gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 320;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=256 nodelay;
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/custom-world-gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 320;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=256 nodelay;
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/puzzle/gallery/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/custom-world-gallery/[^/]+/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
location ~ ^/api(?:/|$) {
|
location ~ ^/api(?:/|$) {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_api_rps burst=64 nodelay;
|
||||||
|
|
||||||
proxy_pass http://genarrative_api;
|
proxy_pass http://genarrative_api;
|
||||||
proxy_http_version 1.1;
|
proxy_http_version 1.1;
|
||||||
|
|||||||
3
deploy/env/api-server.env.example
vendored
3
deploy/env/api-server.env.example
vendored
@@ -8,6 +8,9 @@ GENARRATIVE_API_LOG=info,tower_http=info
|
|||||||
GENARRATIVE_API_LISTEN_BACKLOG=1024
|
GENARRATIVE_API_LISTEN_BACKLOG=1024
|
||||||
GENARRATIVE_API_WORKER_THREADS=4
|
GENARRATIVE_API_WORKER_THREADS=4
|
||||||
GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
|
GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512
|
||||||
|
GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=64
|
||||||
|
GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS=32
|
||||||
|
GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS=16
|
||||||
GENARRATIVE_OTEL_ENABLED=false
|
GENARRATIVE_OTEL_ENABLED=false
|
||||||
OTEL_SERVICE_NAME=genarrative-api
|
OTEL_SERVICE_NAME=genarrative-api
|
||||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:4318
|
OTEL_EXPORTER_OTLP_ENDPOINT=http://127.0.0.1:4318
|
||||||
|
|||||||
@@ -14,6 +14,9 @@ upstream genarrative_api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_gallery_rps:10m rate=650r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_api_rps:10m rate=300r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_admin_rps:10m rate=30r/s;
|
||||||
|
|
||||||
server {
|
server {
|
||||||
listen 80;
|
listen 80;
|
||||||
@@ -22,6 +25,8 @@ server {
|
|||||||
error_log /var/log/nginx/genarrative.error.log warn;
|
error_log /var/log/nginx/genarrative.error.log warn;
|
||||||
limit_conn_status 429;
|
limit_conn_status 429;
|
||||||
limit_conn_log_level warn;
|
limit_conn_log_level warn;
|
||||||
|
limit_req_status 429;
|
||||||
|
limit_req_log_level warn;
|
||||||
|
|
||||||
gzip on;
|
gzip on;
|
||||||
gzip_vary on;
|
gzip_vary on;
|
||||||
@@ -48,6 +53,7 @@ server {
|
|||||||
location ^~ /admin/api/ {
|
location ^~ /admin/api/ {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_admin_rps burst=16 nodelay;
|
||||||
|
|
||||||
if ($genarrative_maintenance) {
|
if ($genarrative_maintenance) {
|
||||||
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
@@ -85,10 +91,107 @@ server {
|
|||||||
try_files $uri =404;
|
try_files $uri =404;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/puzzle/gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=64 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/custom-world-gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=64 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/puzzle/gallery/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/custom-world-gallery/[^/]+/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
# 临时兼容主站仍在使用的 /api/* HTTP facade;前端完成 SpacetimeDB SDK 迁移后删除。
|
# 临时兼容主站仍在使用的 /api/* HTTP facade;前端完成 SpacetimeDB SDK 迁移后删除。
|
||||||
location ~ ^/api(?:/|$) {
|
location ~ ^/api(?:/|$) {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_api_rps burst=64 nodelay;
|
||||||
|
|
||||||
if ($genarrative_maintenance) {
|
if ($genarrative_maintenance) {
|
||||||
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ upstream genarrative_api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
limit_conn_zone $binary_remote_addr zone=genarrative_api_conn:10m;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_gallery_rps:10m rate=650r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_api_rps:10m rate=300r/s;
|
||||||
|
limit_req_zone $binary_remote_addr zone=genarrative_admin_rps:10m rate=30r/s;
|
||||||
|
|
||||||
server {
|
server {
|
||||||
listen 80;
|
listen 80;
|
||||||
@@ -20,6 +23,8 @@ server {
|
|||||||
error_log /var/log/nginx/genarrative.error.log warn;
|
error_log /var/log/nginx/genarrative.error.log warn;
|
||||||
limit_conn_status 429;
|
limit_conn_status 429;
|
||||||
limit_conn_log_level warn;
|
limit_conn_log_level warn;
|
||||||
|
limit_req_status 429;
|
||||||
|
limit_req_log_level warn;
|
||||||
|
|
||||||
location /.well-known/acme-challenge/ {
|
location /.well-known/acme-challenge/ {
|
||||||
root /var/www/html;
|
root /var/www/html;
|
||||||
@@ -35,6 +40,10 @@ server {
|
|||||||
server_name genarrative.example.com;
|
server_name genarrative.example.com;
|
||||||
access_log /var/log/nginx/genarrative.access.log genarrative_upstream;
|
access_log /var/log/nginx/genarrative.access.log genarrative_upstream;
|
||||||
error_log /var/log/nginx/genarrative.error.log warn;
|
error_log /var/log/nginx/genarrative.error.log warn;
|
||||||
|
limit_conn_status 429;
|
||||||
|
limit_conn_log_level warn;
|
||||||
|
limit_req_status 429;
|
||||||
|
limit_req_log_level warn;
|
||||||
|
|
||||||
gzip on;
|
gzip on;
|
||||||
gzip_vary on;
|
gzip_vary on;
|
||||||
@@ -64,6 +73,7 @@ server {
|
|||||||
location ^~ /admin/api/ {
|
location ^~ /admin/api/ {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_admin_rps burst=16 nodelay;
|
||||||
|
|
||||||
if ($genarrative_maintenance) {
|
if ($genarrative_maintenance) {
|
||||||
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
@@ -101,10 +111,107 @@ server {
|
|||||||
try_files $uri =404;
|
try_files $uri =404;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/puzzle/gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=64 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location = /api/runtime/custom-world-gallery {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_gallery_rps burst=64 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/puzzle/gallery/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ ^/api/runtime/custom-world-gallery/[^/]+/[^/]+$ {
|
||||||
|
default_type application/json;
|
||||||
|
limit_conn genarrative_api_conn 32;
|
||||||
|
limit_req zone=genarrative_api_rps burst=32 nodelay;
|
||||||
|
|
||||||
|
if ($genarrative_maintenance) {
|
||||||
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_pass http://genarrative_api;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_buffering off;
|
||||||
|
proxy_read_timeout 3600s;
|
||||||
|
proxy_send_timeout 3600s;
|
||||||
|
add_header X-Accel-Buffering no always;
|
||||||
|
proxy_set_header Connection "";
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_set_header X-Forwarded-Host $host;
|
||||||
|
proxy_set_header X-Request-Id $request_id;
|
||||||
|
}
|
||||||
|
|
||||||
# 临时兼容主站仍在使用的 /api/* HTTP facade;前端完成 SpacetimeDB SDK 迁移后删除。
|
# 临时兼容主站仍在使用的 /api/* HTTP facade;前端完成 SpacetimeDB SDK 迁移后删除。
|
||||||
location ~ ^/api(?:/|$) {
|
location ~ ^/api(?:/|$) {
|
||||||
default_type application/json;
|
default_type application/json;
|
||||||
limit_conn genarrative_api_conn 64;
|
limit_conn genarrative_api_conn 64;
|
||||||
|
limit_req zone=genarrative_api_rps burst=64 nodelay;
|
||||||
|
|
||||||
if ($genarrative_maintenance) {
|
if ($genarrative_maintenance) {
|
||||||
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
return 503 '{"ok":false,"error":{"code":"MAINTENANCE","message":"服务维护中"}}';
|
||||||
|
|||||||
@@ -154,16 +154,16 @@ Jenkins 按 web / api / Spacetime module / build / deploy / publish 拆分
|
|||||||
50 HTTP req/s 首版压测优化口径:
|
50 HTTP req/s 首版压测优化口径:
|
||||||
|
|
||||||
- `api-server` 生产模板默认 `GENARRATIVE_API_LISTEN_BACKLOG=1024`、`GENARRATIVE_API_WORKER_THREADS=4`;本地未设置 worker threads 时继续使用 Tokio 默认值。
|
- `api-server` 生产模板默认 `GENARRATIVE_API_LISTEN_BACKLOG=1024`、`GENARRATIVE_API_WORKER_THREADS=4`;本地未设置 worker threads 时继续使用 Tokio 默认值。
|
||||||
- `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512` 开启应用内 HTTP 并发背压,超过并发许可时直接返回 `429 Too Many Requests` 和 `Retry-After: 1`,`/healthz` 不受该限制。该值不是 RPS 限速;如果压测中 429 上升但内存和 p95 收敛,说明背压正在保护进程,需要结合真实容量调阈值或在 Nginx 前置限流。直连 `api-server` 的极高 RPS 压测若出现 `connection refused`,通常已经打到 TCP 监听 / accept 层,应同时检查 backlog、Nginx upstream keepalive 和前置限流。
|
- `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS=512` 开启应用内 HTTP 并发背压;`GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS=64`、`GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS=32`、`GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS=16` 分别限制公开列表、公开详情和后台 API 热路径。超过许可时直接返回 `429 Too Many Requests` 和 `Retry-After: 1`,`/healthz` 不受该限制。这些值不是 RPS 限速;如果压测中 429 上升但内存和 p95 收敛,说明背压正在保护进程。直连 `api-server` 的极高 RPS 压测若出现 `connection refused`,通常已经打到 TCP 监听 / accept 层,应同时检查 backlog、Nginx upstream keepalive 和前置限流。
|
||||||
- `genarrative-api.service` 设置 `LimitNOFILE=65535`、`TasksMax=2048`;上线后用 `systemctl show genarrative-api.service -p LimitNOFILE -p TasksMax` 和 `cat /proc/$(pidof api-server)/limits` 核对。
|
- `genarrative-api.service` 设置 `LimitNOFILE=65535`、`TasksMax=2048`;上线后用 `systemctl show genarrative-api.service -p LimitNOFILE -p TasksMax` 和 `cat /proc/$(pidof api-server)/limits` 核对。
|
||||||
- Server provision 不在目标机下载 SpacetimeDB 或 `otelcol-contrib`。Jenkins 的 `Prepare Provision Tools` 阶段在 `linux && genarrative-build` 构建机执行 `scripts/prepare-server-provision-tools.sh`,通过官方 SpacetimeDB 安装入口 `https://install.spacetimedb.com` 和 OpenTelemetry release 包生成 `provision-tools/`,再通过 `stash/unstash` 上传到 release 部署 agent。目标机上的 `scripts/jenkins-server-provision.sh` 只从该工作区工具包安装 `/stdb/spacetime`、`/stdb/bin/current/*` 和 `/usr/local/bin/otelcol-contrib`。
|
- Server provision 不在目标机下载 SpacetimeDB 或 `otelcol-contrib`。Jenkins 的 `Prepare Provision Tools` 阶段在 `linux && genarrative-build` 构建机执行 `scripts/prepare-server-provision-tools.sh`,通过官方 SpacetimeDB 安装入口 `https://install.spacetimedb.com` 和 OpenTelemetry release 包生成 `provision-tools/`,再通过 `stash/unstash` 上传到 release 部署 agent。目标机上的 `scripts/jenkins-server-provision.sh` 只从该工作区工具包安装 `/stdb/spacetime`、`/stdb/bin/current/*` 和 `/usr/local/bin/otelcol-contrib`。
|
||||||
- `otelcol-contrib.service` 作为可选系统服务加入 provision,默认监听 `127.0.0.1:4317/4318` 并使用 `deploy/otelcol/genarrative-debug.yaml`。api-server 是否发送 OTLP 仍由 `GENARRATIVE_OTEL_ENABLED` 控制,服务 unit 见 `deploy/systemd/otelcol-contrib.service`。
|
- `otelcol-contrib.service` 作为可选系统服务加入 provision,默认监听 `127.0.0.1:4317/4318` 并使用 `deploy/otelcol/genarrative-debug.yaml`。api-server 是否发送 OTLP 仍由 `GENARRATIVE_OTEL_ENABLED` 控制,服务 unit 见 `deploy/systemd/otelcol-contrib.service`。
|
||||||
- Nginx `/api/` 与 `/admin/api/` 通过 `genarrative_api` upstream 代理到 `127.0.0.1:8082`,upstream keepalive 为 64;压测时看 `/var/log/nginx/genarrative.access.log` 中的 `request_time`、`upstream_connect_time`、`upstream_header_time`、`upstream_response_time`、`upstream_status`、`request_id`。
|
- Nginx `/api/` 与 `/admin/api/` 通过 `genarrative_api` upstream 代理到 `127.0.0.1:8082`,upstream keepalive 为 64;`limit_conn` 负责连接 / 并发保护,`limit_req` 负责入口 RPS 快拒绝。当前模板把公开 gallery list 单独放到 `genarrative_gallery_rps`,公开详情和普通 API 放到 `genarrative_api_rps`,后台 API 放到 `genarrative_admin_rps`;压测时看 `/var/log/nginx/genarrative.access.log` 中的 `request_time`、`upstream_connect_time`、`upstream_header_time`、`upstream_response_time`、`upstream_status`、`request_id`。
|
||||||
- 作品列表 K6 脚本一次 iteration 默认请求两个公开接口,因此约 50 HTTP req/s 的目标命令使用 `SCENARIO=spike START_RPS=5 PEAK_RPS=25 HOLD=60s END_RPS=5 DETAIL_RATIO=0 npm run loadtest:k6:works`。
|
- 作品列表 K6 脚本一次 iteration 默认请求两个公开接口,因此约 50 HTTP req/s 的目标命令使用 `SCENARIO=spike START_RPS=5 PEAK_RPS=25 HOLD=60s END_RPS=5 DETAIL_RATIO=0 npm run loadtest:k6:works`。
|
||||||
- 作品列表短期继续由 `api-server` / BFF 订阅 SpacetimeDB 公开 read model 后读本地 cache,不让浏览器前端直接订阅完整列表;未来如新增 `public_work_gallery_entry` 等专用公开作品列表 read model,前端只可订阅稳定、低基数、公开的专用投影,禁止订阅 `puzzle_work_profile`、`custom_world_profile` 等玩法源表后自行 join、聚合或判断权限。前端直订阅落地前必须先补齐权限、字段契约、排序 / 分页、埋点和 BFF 回退策略。
|
- 作品列表短期继续由 `api-server` / BFF 订阅 SpacetimeDB 公开 read model 后读本地 cache,不让浏览器前端直接订阅完整列表;未来如新增 `public_work_gallery_entry` 等专用公开作品列表 read model,前端只可订阅稳定、低基数、公开的专用投影,禁止订阅 `puzzle_work_profile`、`custom_world_profile` 等玩法源表后自行 join、聚合或判断权限。前端直订阅落地前必须先补齐权限、字段契约、排序 / 分页、埋点和 BFF 回退策略。
|
||||||
- 50 HTTP req/s 验收目标为 `http_req_failed < 1%`、`p95 < 2s`、`dropped_iterations = 0`,同时压测窗口内 Nginx 无新增 502。
|
- 50 HTTP req/s 验收目标为 `http_req_failed < 1%`、`p95 < 2s`、`dropped_iterations = 0`,同时压测窗口内 Nginx 无新增 502。
|
||||||
|
|
||||||
容器化压测与隔离部署方案单独放在 `deploy/container/`,用于本机或预发模拟 Linux release + Nginx + OTLP Collector 拓扑,不替换当前生产 `systemd + Nginx + Jenkins` 发布路径。当前容器模拟参数按 `genarrative-release` 采样值收口为 2 vCPU / 2 GiB RAM / `nofile=4096` / `worker_connections=768`,并在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=768m`、`api-server cpus=2.0 mem_limit=1g`、`nginx cpus=0.25 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=0.5 mem_limit=512m`:
|
容器化压测与隔离部署方案单独放在 `deploy/container/`,用于本机或预发模拟 Linux release + Nginx + OTLP Collector 拓扑,不替换当前生产 `systemd + Nginx + Jenkins` 发布路径。当前容器模拟参数按 `genarrative-release` 采样值收口为 2 vCPU / 2 GiB RAM / `nofile=4096` / `worker_connections=768`,并在 compose 里落实到 `spacetimedb cpus=1.0 mem_limit=768m`、`api-server cpus=2.0 mem_limit=1g`、`nginx cpus=0.25 mem_limit=128m`、`otelcol cpus=0.25 mem_limit=128m`、`k6 cpus=0.5 mem_limit=512m`。容器 `api-server` 默认 `GENARRATIVE_API_WORKER_THREADS=4`,只增加 Tokio worker 调度并发,不突破 `api-server cpus=2.0` 的 CPU 配额:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm run container:init
|
npm run container:init
|
||||||
@@ -185,8 +185,8 @@ OpenTelemetry 现阶段可选 OTLP traces / metrics / logs,但本地日志与
|
|||||||
- api-server 当前发 OTLP HTTP,`OTEL_EXPORTER_OTLP_ENDPOINT` 指向 Collector HTTP base endpoint;不要改到 gRPC `4317` 或 Rider 端口,Rider 由 Collector 通过 `RIDER_OTLP_GRPC_ENDPOINT` 转发。
|
- api-server 当前发 OTLP HTTP,`OTEL_EXPORTER_OTLP_ENDPOINT` 指向 Collector HTTP base endpoint;不要改到 gRPC `4317` 或 Rider 端口,Rider 由 Collector 通过 `RIDER_OTLP_GRPC_ENDPOINT` 转发。
|
||||||
- 应用日志仍通过 `journalctl -u genarrative-api.service` 查看,Nginx 日志仍写文件;日志等级继续用 `GENARRATIVE_API_LOG` / `RUST_LOG` 控制,例如 `info,tower_http=info,spacetime_client=info`。
|
- 应用日志仍通过 `journalctl -u genarrative-api.service` 查看,Nginx 日志仍写文件;日志等级继续用 `GENARRATIVE_API_LOG` / `RUST_LOG` 控制,例如 `info,tower_http=info,spacetime_client=info`。
|
||||||
- debug exporter / Rider 转发都会同时接收 traces、metrics 和 logs。
|
- debug exporter / Rider 转发都会同时接收 traces、metrics 和 logs。
|
||||||
- api-server 会随 metrics 发送进程级指标:`process.memory.usage`、`process.memory.virtual`、`process.thread.count`、`genarrative.process.memory.private`;Windows 额外发送 `process.windows.handle.count`,Linux 额外发送 `process.unix.file_descriptor.count`。这些指标只描述当前进程,不携带请求、用户或作品 label。
|
- api-server 会随 metrics 发送进程级指标:`process.memory.usage`、`process.memory.virtual`、`process.cpu.time`、`genarrative.process.cpu.usage_percent`、`process.thread.count`、`genarrative.process.memory.private`;Windows 额外发送 `process.windows.handle.count`,Linux 额外发送 `process.unix.file_descriptor.count`。这些指标只描述当前进程,不携带请求、用户或作品 label。
|
||||||
- HTTP 运行态补充发送 `genarrative.http.server.response_bodies.in_flight` 与 `genarrative.http.server.request_permits.available`,用于区分业务 handler / 背压 permit 是否仍被占用;拼图广场热点缓存补充发送 `genarrative.puzzle_gallery.cache.*` 指标,记录命中、未命中、重建耗时和预序列化 data JSON 字节数。
|
- HTTP 运行态补充发送 `genarrative.http.server.response_bodies.in_flight` 与 `genarrative.http.server.request_permits.available`,后者带低基数 `pool=default|gallery|detail|admin` label,用于区分业务 handler / 背压 permit 是否仍被占用;拼图广场热点缓存补充发送 `genarrative.puzzle_gallery.cache.*` 指标,记录 fresh hit、stale hit、未命中、后台刷新开始 / 失败、重建耗时和预序列化 data JSON 字节数。
|
||||||
- SpacetimeDB 观测分为两类:procedure / reducer 调用继续用 `genarrative.spacetime.procedure.*`,订阅本地 cache 读使用 `genarrative.spacetime.read.*`。`read=list_puzzle_gallery` 表示拼图广场当前从 `puzzle_gallery_card_view` 本地 cache 读取,不再每个 HTTP 请求调用 `list_puzzle_gallery` procedure。
|
- SpacetimeDB 观测分为两类:procedure / reducer 调用继续用 `genarrative.spacetime.procedure.*`,订阅本地 cache 读使用 `genarrative.spacetime.read.*`。`read=list_puzzle_gallery` 表示拼图广场当前从 `puzzle_gallery_card_view` 本地 cache 读取,不再每个 HTTP 请求调用 `list_puzzle_gallery` procedure。
|
||||||
- 本地 Windows 直连压测的内存高水位要结合 K6 VU / 连接数解释。250 RPS 下过高 `PREALLOCATED_VUS` 可能让 300 个本地 Established 连接把 `api-server` private memory 瞬时推到 GB 级,且 `/healthz` 小响应也能复现;若压测结束后回落、`response_bodies.in_flight` 和背压 permit 未显示业务积压,应优先按连接 / 发送链路高水位处理,而不是判断为 SpacetimeDB 或 JSON 缓存泄漏。
|
- 本地 Windows 直连压测的内存高水位要结合 K6 VU / 连接数解释。250 RPS 下过高 `PREALLOCATED_VUS` 可能让 300 个本地 Established 连接把 `api-server` private memory 瞬时推到 GB 级,且 `/healthz` 小响应也能复现;若压测结束后回落、`response_bodies.in_flight` 和背压 permit 未显示业务积压,应优先按连接 / 发送链路高水位处理,而不是判断为 SpacetimeDB 或 JSON 缓存泄漏。
|
||||||
- Rider 的 Logs 面板只展示 log event 自身字段,不会自动展开父 span 的全部 attributes;请求完成日志会直接带 `request_id`、`http.request.method`、`http.route`、`url.scheme`、`url.path`、`http.response.status_code`、`status_class`、`latency_ms` 和 `slow_request`,完整链路继续到 Traces 面板按 trace/span 查看。
|
- Rider 的 Logs 面板只展示 log event 自身字段,不会自动展开父 span 的全部 attributes;请求完成日志会直接带 `request_id`、`http.request.method`、`http.route`、`url.scheme`、`url.path`、`http.response.status_code`、`status_class`、`latency_ms` 和 `slow_request`,完整链路继续到 Traces 面板按 trace/span 查看。
|
||||||
|
|||||||
@@ -226,7 +226,7 @@ npm run loadtest:k6:works
|
|||||||
## 排障
|
## 排障
|
||||||
|
|
||||||
- 如果公开 gallery 返回 `creation_entry_disabled` 或 503,检查本地 creation entry 配置是否禁用了对应入口。
|
- 如果公开 gallery 返回 `creation_entry_disabled` 或 503,检查本地 creation entry 配置是否禁用了对应入口。
|
||||||
- 如果高压下返回 429,优先确认目标环境是否设置了 `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS`。429 表示 api-server 应用层背压已生效,不等同于业务错误;继续看内存、p95、`http_req_failed` 和 OTLP / Nginx timing 判断阈值是否偏低。
|
- 如果高压下返回 429,优先确认目标环境是否设置了 `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS` 以及 `GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS`、`GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS`、`GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS`。429 表示 Nginx 或 api-server 背压已生效,不等同于业务错误;继续看内存、p95、`http_req_failed` 和 OTLP / Nginx timing 判断阈值是否偏低。
|
||||||
- 如果直连 `api-server` 压测出现 `connection refused` 或 status 0,说明压力已经打到 TCP 监听 / accept 层;此时同时检查 `GENARRATIVE_API_LISTEN_BACKLOG`、Nginx upstream keepalive 和是否需要在 Nginx 前置限流,不能只靠应用层背压解释。
|
- 如果直连 `api-server` 压测出现 `connection refused` 或 status 0,说明压力已经打到 TCP 监听 / accept 层;此时同时检查 `GENARRATIVE_API_LISTEN_BACKLOG`、Nginx upstream keepalive 和是否需要在 Nginx 前置限流,不能只靠应用层背压解释。
|
||||||
- 如果个人作品列表返回 401,确认 `AUTH_TOKEN` 是当前 api-server 可识别的 access token。
|
- 如果个人作品列表返回 401,确认 `AUTH_TOKEN` 是当前 api-server 可识别的 access token。
|
||||||
- 如果详情全部 404,确认是否已向目标环境导入与 `WORKS_DATA` 一致的数据。
|
- 如果详情全部 404,确认是否已向目标环境导入与 `WORKS_DATA` 一致的数据。
|
||||||
@@ -317,12 +317,14 @@ Rider 的 Logs 面板展示的是 OTLP log event 自身字段,不会自动把
|
|||||||
- `process.memory.usage`:进程常驻内存 / RSS。
|
- `process.memory.usage`:进程常驻内存 / RSS。
|
||||||
- `process.memory.virtual`:进程虚拟内存;Windows 当前按 `PrivateUsage` 上报,Linux 取 `VmSize`。
|
- `process.memory.virtual`:进程虚拟内存;Windows 当前按 `PrivateUsage` 上报,Linux 取 `VmSize`。
|
||||||
- `genarrative.process.memory.private`:进程私有内存,Windows 来自 `PrivateUsage`,Linux 近似取 `/proc/self/status` 的 `VmData`。
|
- `genarrative.process.memory.private`:进程私有内存,Windows 来自 `PrivateUsage`,Linux 近似取 `/proc/self/status` 的 `VmData`。
|
||||||
|
- `process.cpu.time`:进程 user + system 累计 CPU 秒数。
|
||||||
|
- `genarrative.process.cpu.usage_percent`:两次指标采集之间的进程 CPU 使用率;100% 约等于占满 1 个 CPU core。
|
||||||
- `process.thread.count`:线程数。
|
- `process.thread.count`:线程数。
|
||||||
- `process.windows.handle.count`:Windows 句柄数。
|
- `process.windows.handle.count`:Windows 句柄数。
|
||||||
- `process.unix.file_descriptor.count`:Linux 文件描述符数。
|
- `process.unix.file_descriptor.count`:Linux 文件描述符数。
|
||||||
- `genarrative.http.server.response_bodies.in_flight`:Axum / Hyper 仍持有的响应 body 数;如果内存高但该值很低,说明热点不在业务 handler 生命周期内。
|
- `genarrative.http.server.response_bodies.in_flight`:Axum / Hyper 仍持有的响应 body 数;如果内存高但该值很低,说明热点不在业务 handler 生命周期内。
|
||||||
- `genarrative.http.server.request_permits.available`:应用层 HTTP 背压剩余 permit 数;如果该值未接近 0,说明没有打满 `GENARRATIVE_API_MAX_CONCURRENT_REQUESTS`。
|
- `genarrative.http.server.request_permits.available`:应用层 HTTP 背压剩余 permit 数,带 `pool=default|gallery|detail|admin`;如果目标 pool 未接近 0,说明没有打满对应 `GENARRATIVE_API_*_MAX_CONCURRENT_REQUESTS`。
|
||||||
- `genarrative.puzzle_gallery.cache.hits` / `genarrative.puzzle_gallery.cache.misses` / `genarrative.puzzle_gallery.cache.rebuilds`:拼图广场响应缓存命中、未命中和重建次数。
|
- `genarrative.puzzle_gallery.cache.hits` / `genarrative.puzzle_gallery.cache.stale_hits` / `genarrative.puzzle_gallery.cache.misses` / `genarrative.puzzle_gallery.cache.refreshes_started` / `genarrative.puzzle_gallery.cache.refreshes_failed` / `genarrative.puzzle_gallery.cache.rebuilds`:拼图广场响应缓存 fresh 命中、stale 命中、未命中、后台刷新和重建次数。
|
||||||
- `genarrative.puzzle_gallery.cache.rebuild.duration`:拼图广场缓存重建耗时。
|
- `genarrative.puzzle_gallery.cache.rebuild.duration`:拼图广场缓存重建耗时。
|
||||||
- `genarrative.puzzle_gallery.cache.data_json_bytes`:拼图广场缓存内预序列化 data JSON 大小。
|
- `genarrative.puzzle_gallery.cache.data_json_bytes`:拼图广场缓存内预序列化 data JSON 大小。
|
||||||
- `genarrative.spacetime.read.calls` / `genarrative.spacetime.read.duration_ms`:SpacetimeDB 订阅本地 cache 读次数和耗时;`read=list_puzzle_gallery` 表示当前路径走 view / local cache,不是 procedure。
|
- `genarrative.spacetime.read.calls` / `genarrative.spacetime.read.duration_ms`:SpacetimeDB 订阅本地 cache 读次数和耗时;`read=list_puzzle_gallery` 表示当前路径走 view / local cache,不是 procedure。
|
||||||
@@ -336,7 +338,7 @@ Rider 的 Logs 面板展示的是 OTLP log event 自身字段,不会自动把
|
|||||||
```bash
|
```bash
|
||||||
systemctl show genarrative-api.service -p LimitNOFILE -p TasksMax
|
systemctl show genarrative-api.service -p LimitNOFILE -p TasksMax
|
||||||
cat /proc/$(pidof api-server)/limits
|
cat /proc/$(pidof api-server)/limits
|
||||||
tr '\0' '\n' < /proc/$(pidof api-server)/environ | grep GENARRATIVE_API_MAX_CONCURRENT_REQUESTS
|
tr '\0' '\n' < /proc/$(pidof api-server)/environ | grep 'GENARRATIVE_API_.*MAX_CONCURRENT_REQUESTS'
|
||||||
ss -ltnp | grep 8082
|
ss -ltnp | grep 8082
|
||||||
curl -sS http://127.0.0.1:8082/healthz
|
curl -sS http://127.0.0.1:8082/healthz
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
{
|
{
|
||||||
"source": "spacetime-migration-7.local.json",
|
"source": "spacetime-migration-1.json",
|
||||||
"generatedAt": "2026-05-11T13:09:51.569Z",
|
"generatedAt": "2026-05-18T11:54:04.280Z",
|
||||||
"counts": {
|
"counts": {
|
||||||
"puzzle_work_profile": 3,
|
"puzzle_work_profile": 3,
|
||||||
"custom_world_profile": 1,
|
"custom_world_profile": 1,
|
||||||
"match3d_work_profile": 0
|
"match3d_work_profile": 0,
|
||||||
|
"square_hole_work_profile": 0,
|
||||||
|
"visual_novel_work_profile": 0
|
||||||
},
|
},
|
||||||
"tables": {
|
"tables": {
|
||||||
"puzzle_work_profile": [
|
"puzzle_work_profile": [
|
||||||
@@ -113,7 +115,9 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"match3d_work_profile": []
|
"match3d_work_profile": [],
|
||||||
|
"square_hole_work_profile": [],
|
||||||
|
"visual_novel_work_profile": []
|
||||||
},
|
},
|
||||||
"profileIds": {
|
"profileIds": {
|
||||||
"puzzle": [
|
"puzzle": [
|
||||||
|
|||||||
@@ -137,12 +137,12 @@ function unwrapPayload(json) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function hasCollection(payload, keys) {
|
function hasCollection(payload, keys) {
|
||||||
return keys.some((key) => Array.isArray(payload?.[key]));
|
return Boolean(payload) && keys.some((key) => Array.isArray(payload[key]));
|
||||||
}
|
}
|
||||||
|
|
||||||
function firstCollection(payload, keys) {
|
function firstCollection(payload, keys) {
|
||||||
for (const key of keys) {
|
for (const key of keys) {
|
||||||
if (Array.isArray(payload?.[key])) return payload[key];
|
if (payload && Array.isArray(payload[key])) return payload[key];
|
||||||
}
|
}
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@@ -152,10 +152,11 @@ function hasListItemShape(payload, keys) {
|
|||||||
if (collection.length === 0) return true;
|
if (collection.length === 0) return true;
|
||||||
const item = collection[0];
|
const item = collection[0];
|
||||||
const hasId = Boolean(
|
const hasId = Boolean(
|
||||||
item?.profileId || item?.profile_id || item?.workId || item?.work_id || item?.publicWorkCode,
|
item &&
|
||||||
|
(item.profileId || item.profile_id || item.workId || item.work_id || item.publicWorkCode),
|
||||||
);
|
);
|
||||||
const hasTitle = Boolean(
|
const hasTitle = Boolean(
|
||||||
item?.title || item?.workTitle || item?.work_title || item?.levelName || item?.worldName,
|
item && (item.title || item.workTitle || item.work_title || item.levelName || item.worldName),
|
||||||
);
|
);
|
||||||
return hasId && hasTitle;
|
return hasId && hasTitle;
|
||||||
}
|
}
|
||||||
@@ -213,7 +214,8 @@ function performDetailRequest() {
|
|||||||
const payload = unwrapPayload(json);
|
const payload = unwrapPayload(json);
|
||||||
const ok = check(response, {
|
const ok = check(response, {
|
||||||
[`${endpoint.name} status is 200`]: (res) => res.status === 200,
|
[`${endpoint.name} status is 200`]: (res) => res.status === 200,
|
||||||
[`${endpoint.name} has detail payload`]: () => endpoint.expectKeys.some((key) => payload?.[key]),
|
[`${endpoint.name} has detail payload`]: () =>
|
||||||
|
Boolean(payload) && endpoint.expectKeys.some((key) => payload[key]),
|
||||||
});
|
});
|
||||||
worksDetailShapeErrorRate.add(!ok, { endpoint: endpoint.name });
|
worksDetailShapeErrorRate.add(!ok, { endpoint: endpoint.name });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use axum::{
|
use axum::{
|
||||||
Router,
|
Router,
|
||||||
body::Body,
|
body::Body,
|
||||||
extract::Extension,
|
extract::{Extension, FromRef},
|
||||||
http::Request,
|
http::Request,
|
||||||
middleware,
|
middleware,
|
||||||
response::Response,
|
response::Response,
|
||||||
@@ -22,7 +22,7 @@ use crate::{
|
|||||||
request_context::{RequestContext, attach_request_context, resolve_request_id},
|
request_context::{RequestContext, attach_request_context, resolve_request_id},
|
||||||
response_headers::propagate_request_id_header,
|
response_headers::propagate_request_id_header,
|
||||||
runtime_inventory::get_runtime_inventory_state,
|
runtime_inventory::get_runtime_inventory_state,
|
||||||
state::AppState,
|
state::{AppState, BackpressureState},
|
||||||
telemetry::record_http_observability,
|
telemetry::record_http_observability,
|
||||||
tracking::record_route_tracking_event_after_success,
|
tracking::record_route_tracking_event_after_success,
|
||||||
vector_engine_audio_generation::{
|
vector_engine_audio_generation::{
|
||||||
@@ -79,7 +79,7 @@ pub fn build_router(state: AppState) -> Router {
|
|||||||
))
|
))
|
||||||
// HTTP 背压在业务路由外侧快拒绝,避免过载请求继续占用 SpacetimeDB facade 与业务执行资源。
|
// HTTP 背压在业务路由外侧快拒绝,避免过载请求继续占用 SpacetimeDB facade 与业务执行资源。
|
||||||
.layer(middleware::from_fn_with_state(
|
.layer(middleware::from_fn_with_state(
|
||||||
state.clone(),
|
BackpressureState::from_ref(&state),
|
||||||
limit_concurrent_requests,
|
limit_concurrent_requests,
|
||||||
))
|
))
|
||||||
// 错误归一化层放在 tracing 里侧,让 tracing 记录到最终对外返回的状态与错误体形态。
|
// 错误归一化层放在 tracing 里侧,让 tracing 记录到最终对外返回的状态与错误体形态。
|
||||||
|
|||||||
@@ -13,11 +13,11 @@ use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};
|
|||||||
use crate::{
|
use crate::{
|
||||||
http_error::AppError,
|
http_error::AppError,
|
||||||
request_context::RequestContext,
|
request_context::RequestContext,
|
||||||
state::{AppState, HttpRequestPermitPool},
|
state::{BackpressureState, HttpRequestPermitPool, HttpRequestPermitPoolKind},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn limit_concurrent_requests(
|
pub async fn limit_concurrent_requests(
|
||||||
State(state): State<AppState>,
|
State(state): State<BackpressureState>,
|
||||||
request: Request,
|
request: Request,
|
||||||
next: Next,
|
next: Next,
|
||||||
) -> Response {
|
) -> Response {
|
||||||
@@ -25,29 +25,38 @@ pub async fn limit_concurrent_requests(
|
|||||||
return next.run(request).await;
|
return next.run(request).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Some(permit_pool) = state.http_request_permit_pool() else {
|
let requested_pool = classify_request_permit_pool(request.uri().path());
|
||||||
|
let Some((permit_pool_kind, permit_pool)) = state.request_permit_pool(requested_pool) else {
|
||||||
return next.run(request).await;
|
return next.run(request).await;
|
||||||
};
|
};
|
||||||
|
|
||||||
match acquire_http_request_permit(permit_pool) {
|
match acquire_http_request_permit(permit_pool_kind, permit_pool) {
|
||||||
Ok(permit) => hold_permit_until_response_body_dropped(next.run(request).await, permit),
|
Ok(permit) => hold_permit_until_response_body_dropped(next.run(request).await, permit),
|
||||||
Err(_) => reject_overloaded_request(&request),
|
Err(_) => reject_overloaded_request(&request),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn acquire_http_request_permit(
|
fn acquire_http_request_permit(
|
||||||
|
permit_pool_kind: HttpRequestPermitPoolKind,
|
||||||
permit_pool: Arc<HttpRequestPermitPool>,
|
permit_pool: Arc<HttpRequestPermitPool>,
|
||||||
) -> Result<HttpRequestPermitGuard, TryAcquireError> {
|
) -> Result<HttpRequestPermitGuard, TryAcquireError> {
|
||||||
match permit_pool.clone().try_acquire_owned() {
|
match permit_pool.clone().try_acquire_owned() {
|
||||||
Ok(permit) => {
|
Ok(permit) => {
|
||||||
crate::telemetry::update_http_request_permits_available(permit_pool.available_permits());
|
crate::telemetry::update_http_request_permits_available(
|
||||||
|
permit_pool_kind,
|
||||||
|
permit_pool.available_permits(),
|
||||||
|
);
|
||||||
Ok(HttpRequestPermitGuard {
|
Ok(HttpRequestPermitGuard {
|
||||||
|
permit_pool_kind,
|
||||||
permit: Some(permit),
|
permit: Some(permit),
|
||||||
permit_pool,
|
permit_pool,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
crate::telemetry::update_http_request_permits_available(permit_pool.available_permits());
|
crate::telemetry::update_http_request_permits_available(
|
||||||
|
permit_pool_kind,
|
||||||
|
permit_pool.available_permits(),
|
||||||
|
);
|
||||||
Err(error)
|
Err(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -66,6 +75,7 @@ fn hold_permit_until_response_body_dropped(
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct HttpRequestPermitGuard {
|
struct HttpRequestPermitGuard {
|
||||||
|
permit_pool_kind: HttpRequestPermitPoolKind,
|
||||||
permit: Option<OwnedSemaphorePermit>,
|
permit: Option<OwnedSemaphorePermit>,
|
||||||
permit_pool: Arc<HttpRequestPermitPool>,
|
permit_pool: Arc<HttpRequestPermitPool>,
|
||||||
}
|
}
|
||||||
@@ -73,7 +83,10 @@ struct HttpRequestPermitGuard {
|
|||||||
impl Drop for HttpRequestPermitGuard {
|
impl Drop for HttpRequestPermitGuard {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
drop(self.permit.take());
|
drop(self.permit.take());
|
||||||
crate::telemetry::update_http_request_permits_available(self.permit_pool.available_permits());
|
crate::telemetry::update_http_request_permits_available(
|
||||||
|
self.permit_pool_kind,
|
||||||
|
self.permit_pool.available_permits(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,6 +105,44 @@ fn should_bypass_backpressure(request: &Request<Body>) -> bool {
|
|||||||
request.uri().path() == "/healthz"
|
request.uri().path() == "/healthz"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn classify_request_permit_pool(path: &str) -> HttpRequestPermitPoolKind {
|
||||||
|
if is_gallery_list_path(path) {
|
||||||
|
HttpRequestPermitPoolKind::Gallery
|
||||||
|
} else if is_gallery_detail_path(path) {
|
||||||
|
HttpRequestPermitPoolKind::Detail
|
||||||
|
} else if path.starts_with("/admin/api/") {
|
||||||
|
HttpRequestPermitPoolKind::Admin
|
||||||
|
} else {
|
||||||
|
HttpRequestPermitPoolKind::Default
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_gallery_list_path(path: &str) -> bool {
|
||||||
|
matches!(
|
||||||
|
path,
|
||||||
|
"/api/runtime/puzzle/gallery" | "/api/runtime/custom-world-gallery"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_gallery_detail_path(path: &str) -> bool {
|
||||||
|
let puzzle_prefix = "/api/runtime/puzzle/gallery/";
|
||||||
|
if let Some(profile_id) = path.strip_prefix(puzzle_prefix) {
|
||||||
|
return !profile_id.is_empty() && !profile_id.contains('/');
|
||||||
|
}
|
||||||
|
|
||||||
|
let custom_world_prefix = "/api/runtime/custom-world-gallery/";
|
||||||
|
if let Some(remainder) = path.strip_prefix(custom_world_prefix) {
|
||||||
|
let mut segments = remainder.split('/');
|
||||||
|
return matches!(
|
||||||
|
(segments.next(), segments.next(), segments.next()),
|
||||||
|
(Some(owner_user_id), Some(profile_id), None)
|
||||||
|
if !owner_user_id.is_empty() && !profile_id.is_empty()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -107,9 +158,14 @@ mod tests {
|
|||||||
use tokio::sync::Notify;
|
use tokio::sync::Notify;
|
||||||
use tower::ServiceExt;
|
use tower::ServiceExt;
|
||||||
|
|
||||||
use crate::{config::AppConfig, state::AppState};
|
use axum::extract::FromRef;
|
||||||
|
|
||||||
use super::limit_concurrent_requests;
|
use crate::{
|
||||||
|
config::AppConfig,
|
||||||
|
state::{AppState, BackpressureState},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{classify_request_permit_pool, limit_concurrent_requests};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct HeldRequestGate {
|
struct HeldRequestGate {
|
||||||
@@ -138,13 +194,50 @@ mod tests {
|
|||||||
let mut config = AppConfig::default();
|
let mut config = AppConfig::default();
|
||||||
config.max_concurrent_requests = Some(max_concurrent_requests);
|
config.max_concurrent_requests = Some(max_concurrent_requests);
|
||||||
let state = AppState::new(config).expect("state should build");
|
let state = AppState::new(config).expect("state should build");
|
||||||
|
let backpressure_state = BackpressureState::from_ref(&state);
|
||||||
|
|
||||||
Router::new()
|
Router::new()
|
||||||
.route("/held", get(held_request))
|
.route("/held", get(held_request))
|
||||||
.route("/fast", get(fast_request))
|
.route("/fast", get(fast_request))
|
||||||
.route("/healthz", get(fast_request))
|
.route("/healthz", get(fast_request))
|
||||||
.layer(middleware::from_fn_with_state(
|
.layer(middleware::from_fn_with_state(
|
||||||
state.clone(),
|
backpressure_state,
|
||||||
|
limit_concurrent_requests,
|
||||||
|
))
|
||||||
|
.layer(Extension(gate))
|
||||||
|
.with_state(state)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_grouped_test_app(
|
||||||
|
default_max_concurrent_requests: usize,
|
||||||
|
gallery_max_concurrent_requests: usize,
|
||||||
|
admin_max_concurrent_requests: usize,
|
||||||
|
gate: HeldRequestGate,
|
||||||
|
) -> Router {
|
||||||
|
let mut config = AppConfig::default();
|
||||||
|
config.max_concurrent_requests = Some(default_max_concurrent_requests);
|
||||||
|
config.gallery_max_concurrent_requests = Some(gallery_max_concurrent_requests);
|
||||||
|
config.admin_max_concurrent_requests = Some(admin_max_concurrent_requests);
|
||||||
|
let state = AppState::new(config).expect("state should build");
|
||||||
|
let backpressure_state = BackpressureState::from_ref(&state);
|
||||||
|
|
||||||
|
Router::new()
|
||||||
|
.route("/held", get(held_request))
|
||||||
|
.route("/api/runtime/puzzle/gallery", get(held_request))
|
||||||
|
.route("/api/runtime/custom-world-gallery", get(held_request))
|
||||||
|
.route("/api/runtime/puzzle/gallery/profile-1", get(held_request))
|
||||||
|
.route(
|
||||||
|
"/api/runtime/puzzle/gallery/profile-1/like",
|
||||||
|
get(fast_request),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/runtime/custom-world-gallery/user-1/profile-1",
|
||||||
|
get(held_request),
|
||||||
|
)
|
||||||
|
.route("/admin/api/overview", get(held_request))
|
||||||
|
.route("/fast", get(fast_request))
|
||||||
|
.layer(middleware::from_fn_with_state(
|
||||||
|
backpressure_state,
|
||||||
limit_concurrent_requests,
|
limit_concurrent_requests,
|
||||||
))
|
))
|
||||||
.layer(Extension(gate))
|
.layer(Extension(gate))
|
||||||
@@ -242,4 +335,147 @@ mod tests {
|
|||||||
.expect("third request should complete");
|
.expect("third request should complete");
|
||||||
assert_eq!(accepted_response.status(), StatusCode::OK);
|
assert_eq!(accepted_response.status(), StatusCode::OK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn gallery_pool_rejects_gallery_without_blocking_default_routes() {
|
||||||
|
let gate = HeldRequestGate {
|
||||||
|
entered: Arc::new(Notify::new()),
|
||||||
|
release: Arc::new(Notify::new()),
|
||||||
|
};
|
||||||
|
let app = build_grouped_test_app(2, 1, 1, gate.clone());
|
||||||
|
let entered = gate.entered.notified();
|
||||||
|
|
||||||
|
let held_response = tokio::spawn(
|
||||||
|
app.clone()
|
||||||
|
.oneshot(test_request("/api/runtime/puzzle/gallery")),
|
||||||
|
);
|
||||||
|
entered.await;
|
||||||
|
|
||||||
|
let rejected_gallery_response = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(test_request("/api/runtime/custom-world-gallery"))
|
||||||
|
.await
|
||||||
|
.expect("rejected gallery request should complete");
|
||||||
|
assert_eq!(
|
||||||
|
rejected_gallery_response.status(),
|
||||||
|
StatusCode::TOO_MANY_REQUESTS
|
||||||
|
);
|
||||||
|
|
||||||
|
let accepted_default_response = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(test_request("/fast"))
|
||||||
|
.await
|
||||||
|
.expect("default request should complete");
|
||||||
|
assert_eq!(accepted_default_response.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
gate.release.notify_one();
|
||||||
|
let completed_response = held_response
|
||||||
|
.await
|
||||||
|
.expect("held request task should join")
|
||||||
|
.expect("held request should complete");
|
||||||
|
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn detail_pool_falls_back_to_default_when_unset() {
|
||||||
|
let gate = HeldRequestGate {
|
||||||
|
entered: Arc::new(Notify::new()),
|
||||||
|
release: Arc::new(Notify::new()),
|
||||||
|
};
|
||||||
|
let mut config = AppConfig::default();
|
||||||
|
config.max_concurrent_requests = Some(1);
|
||||||
|
config.detail_max_concurrent_requests = None;
|
||||||
|
let state = AppState::new(config).expect("state should build");
|
||||||
|
let backpressure_state = BackpressureState::from_ref(&state);
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/api/runtime/puzzle/gallery/profile-1", get(held_request))
|
||||||
|
.route("/fast", get(fast_request))
|
||||||
|
.layer(middleware::from_fn_with_state(
|
||||||
|
backpressure_state,
|
||||||
|
limit_concurrent_requests,
|
||||||
|
))
|
||||||
|
.layer(Extension(gate.clone()))
|
||||||
|
.with_state(state);
|
||||||
|
let entered = gate.entered.notified();
|
||||||
|
|
||||||
|
let held_response = tokio::spawn(
|
||||||
|
app.clone()
|
||||||
|
.oneshot(test_request("/api/runtime/puzzle/gallery/profile-1")),
|
||||||
|
);
|
||||||
|
entered.await;
|
||||||
|
|
||||||
|
let rejected_default_response = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(test_request("/fast"))
|
||||||
|
.await
|
||||||
|
.expect("default request should complete");
|
||||||
|
assert_eq!(
|
||||||
|
rejected_default_response.status(),
|
||||||
|
StatusCode::TOO_MANY_REQUESTS
|
||||||
|
);
|
||||||
|
|
||||||
|
gate.release.notify_one();
|
||||||
|
let completed_response = held_response
|
||||||
|
.await
|
||||||
|
.expect("held request task should join")
|
||||||
|
.expect("held request should complete");
|
||||||
|
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn admin_pool_is_isolated_from_default_routes() {
|
||||||
|
let gate = HeldRequestGate {
|
||||||
|
entered: Arc::new(Notify::new()),
|
||||||
|
release: Arc::new(Notify::new()),
|
||||||
|
};
|
||||||
|
let app = build_grouped_test_app(2, 1, 1, gate.clone());
|
||||||
|
let entered = gate.entered.notified();
|
||||||
|
|
||||||
|
let held_response = tokio::spawn(app.clone().oneshot(test_request("/admin/api/overview")));
|
||||||
|
entered.await;
|
||||||
|
|
||||||
|
let rejected_admin_response = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(test_request("/admin/api/overview"))
|
||||||
|
.await
|
||||||
|
.expect("rejected admin request should complete");
|
||||||
|
assert_eq!(
|
||||||
|
rejected_admin_response.status(),
|
||||||
|
StatusCode::TOO_MANY_REQUESTS
|
||||||
|
);
|
||||||
|
|
||||||
|
let accepted_default_response = app
|
||||||
|
.clone()
|
||||||
|
.oneshot(test_request("/fast"))
|
||||||
|
.await
|
||||||
|
.expect("default request should complete");
|
||||||
|
assert_eq!(accepted_default_response.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
gate.release.notify_one();
|
||||||
|
let completed_response = held_response
|
||||||
|
.await
|
||||||
|
.expect("held request task should join")
|
||||||
|
.expect("held request should complete");
|
||||||
|
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn classifies_only_exact_gallery_detail_paths_as_detail() {
|
||||||
|
assert_eq!(
|
||||||
|
classify_request_permit_pool("/api/runtime/puzzle/gallery/profile-1"),
|
||||||
|
crate::state::HttpRequestPermitPoolKind::Detail
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
classify_request_permit_pool("/api/runtime/puzzle/gallery/profile-1/like"),
|
||||||
|
crate::state::HttpRequestPermitPoolKind::Default
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
classify_request_permit_pool("/api/runtime/custom-world-gallery/user-1/profile-1"),
|
||||||
|
crate::state::HttpRequestPermitPoolKind::Detail
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
classify_request_permit_pool("/api/runtime/custom-world-gallery/user-1/profile-1/like"),
|
||||||
|
crate::state::HttpRequestPermitPoolKind::Default
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ pub struct AppConfig {
|
|||||||
pub listen_backlog: i32,
|
pub listen_backlog: i32,
|
||||||
pub worker_threads: Option<usize>,
|
pub worker_threads: Option<usize>,
|
||||||
pub max_concurrent_requests: Option<usize>,
|
pub max_concurrent_requests: Option<usize>,
|
||||||
|
pub gallery_max_concurrent_requests: Option<usize>,
|
||||||
|
pub detail_max_concurrent_requests: Option<usize>,
|
||||||
|
pub admin_max_concurrent_requests: Option<usize>,
|
||||||
pub log_filter: String,
|
pub log_filter: String,
|
||||||
pub otel_enabled: bool,
|
pub otel_enabled: bool,
|
||||||
pub admin_username: Option<String>,
|
pub admin_username: Option<String>,
|
||||||
@@ -154,6 +157,9 @@ impl Default for AppConfig {
|
|||||||
listen_backlog: 1024,
|
listen_backlog: 1024,
|
||||||
worker_threads: None,
|
worker_threads: None,
|
||||||
max_concurrent_requests: None,
|
max_concurrent_requests: None,
|
||||||
|
gallery_max_concurrent_requests: None,
|
||||||
|
detail_max_concurrent_requests: None,
|
||||||
|
admin_max_concurrent_requests: None,
|
||||||
log_filter: "info,tower_http=info".to_string(),
|
log_filter: "info,tower_http=info".to_string(),
|
||||||
otel_enabled: false,
|
otel_enabled: false,
|
||||||
admin_username: None,
|
admin_username: None,
|
||||||
@@ -322,6 +328,21 @@ impl AppConfig {
|
|||||||
{
|
{
|
||||||
config.max_concurrent_requests = Some(max_concurrent_requests);
|
config.max_concurrent_requests = Some(max_concurrent_requests);
|
||||||
}
|
}
|
||||||
|
if let Some(max_concurrent_requests) =
|
||||||
|
read_first_usize_env(&["GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS"])
|
||||||
|
{
|
||||||
|
config.gallery_max_concurrent_requests = Some(max_concurrent_requests);
|
||||||
|
}
|
||||||
|
if let Some(max_concurrent_requests) =
|
||||||
|
read_first_usize_env(&["GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS"])
|
||||||
|
{
|
||||||
|
config.detail_max_concurrent_requests = Some(max_concurrent_requests);
|
||||||
|
}
|
||||||
|
if let Some(max_concurrent_requests) =
|
||||||
|
read_first_usize_env(&["GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS"])
|
||||||
|
{
|
||||||
|
config.admin_max_concurrent_requests = Some(max_concurrent_requests);
|
||||||
|
}
|
||||||
if let Some(otel_enabled) = read_first_bool_env(&["GENARRATIVE_OTEL_ENABLED"]) {
|
if let Some(otel_enabled) = read_first_bool_env(&["GENARRATIVE_OTEL_ENABLED"]) {
|
||||||
config.otel_enabled = otel_enabled;
|
config.otel_enabled = otel_enabled;
|
||||||
}
|
}
|
||||||
@@ -1206,10 +1227,16 @@ mod tests {
|
|||||||
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
||||||
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
||||||
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS");
|
||||||
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
||||||
std::env::set_var("GENARRATIVE_API_LISTEN_BACKLOG", "2048");
|
std::env::set_var("GENARRATIVE_API_LISTEN_BACKLOG", "2048");
|
||||||
std::env::set_var("GENARRATIVE_API_WORKER_THREADS", "6");
|
std::env::set_var("GENARRATIVE_API_WORKER_THREADS", "6");
|
||||||
std::env::set_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS", "128");
|
std::env::set_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS", "128");
|
||||||
|
std::env::set_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS", "64");
|
||||||
|
std::env::set_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS", "32");
|
||||||
|
std::env::set_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS", "16");
|
||||||
std::env::set_var("GENARRATIVE_OTEL_ENABLED", "true");
|
std::env::set_var("GENARRATIVE_OTEL_ENABLED", "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1217,12 +1244,18 @@ mod tests {
|
|||||||
assert_eq!(config.listen_backlog, 2048);
|
assert_eq!(config.listen_backlog, 2048);
|
||||||
assert_eq!(config.worker_threads, Some(6));
|
assert_eq!(config.worker_threads, Some(6));
|
||||||
assert_eq!(config.max_concurrent_requests, Some(128));
|
assert_eq!(config.max_concurrent_requests, Some(128));
|
||||||
|
assert_eq!(config.gallery_max_concurrent_requests, Some(64));
|
||||||
|
assert_eq!(config.detail_max_concurrent_requests, Some(32));
|
||||||
|
assert_eq!(config.admin_max_concurrent_requests, Some(16));
|
||||||
assert!(config.otel_enabled);
|
assert!(config.otel_enabled);
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
||||||
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
||||||
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS");
|
||||||
|
std::env::remove_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS");
|
||||||
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
use std::sync::OnceLock;
|
use std::{
|
||||||
|
sync::{Mutex, OnceLock},
|
||||||
|
time::Instant,
|
||||||
|
};
|
||||||
|
|
||||||
use opentelemetry::global;
|
use opentelemetry::global;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
@@ -52,6 +55,38 @@ fn register_process_metrics_once() {
|
|||||||
})
|
})
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
meter
|
||||||
|
.f64_observable_counter("process.cpu.time")
|
||||||
|
.with_unit("s")
|
||||||
|
.with_description("api-server total user plus system CPU time")
|
||||||
|
.with_callback(|observer| {
|
||||||
|
let Some(snapshot) = ProcessMetricsSnapshot::collect() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
if let Some(cpu_time_seconds) = snapshot.cpu_time_seconds {
|
||||||
|
observer.observe(cpu_time_seconds, &[]);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.build();
|
||||||
|
|
||||||
|
meter
|
||||||
|
.f64_observable_gauge("genarrative.process.cpu.usage_percent")
|
||||||
|
.with_unit("%")
|
||||||
|
.with_description("api-server process CPU usage between metric collections")
|
||||||
|
.with_callback(|observer| {
|
||||||
|
let Some(snapshot) = ProcessMetricsSnapshot::collect() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
if let Some(cpu_time_seconds) = snapshot.cpu_time_seconds {
|
||||||
|
if let Some(usage_percent) =
|
||||||
|
process_cpu_usage_percent(cpu_time_seconds, Instant::now())
|
||||||
|
{
|
||||||
|
observer.observe(usage_percent, &[]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.build();
|
||||||
|
|
||||||
meter
|
meter
|
||||||
.i64_observable_up_down_counter("process.thread.count")
|
.i64_observable_up_down_counter("process.thread.count")
|
||||||
.with_unit("{thread}")
|
.with_unit("{thread}")
|
||||||
@@ -97,11 +132,12 @@ fn to_i64(value: u64) -> i64 {
|
|||||||
value.min(i64::MAX as u64) as i64
|
value.min(i64::MAX as u64) as i64
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
struct ProcessMetricsSnapshot {
|
struct ProcessMetricsSnapshot {
|
||||||
rss_bytes: u64,
|
rss_bytes: u64,
|
||||||
private_bytes: Option<u64>,
|
private_bytes: Option<u64>,
|
||||||
virtual_bytes: Option<u64>,
|
virtual_bytes: Option<u64>,
|
||||||
|
cpu_time_seconds: Option<f64>,
|
||||||
thread_count: u64,
|
thread_count: u64,
|
||||||
windows_handle_count: Option<u64>,
|
windows_handle_count: Option<u64>,
|
||||||
unix_fd_count: Option<u64>,
|
unix_fd_count: Option<u64>,
|
||||||
@@ -111,12 +147,56 @@ impl ProcessMetricsSnapshot {
|
|||||||
fn collect() -> Option<Self> {
|
fn collect() -> Option<Self> {
|
||||||
collect_process_metrics()
|
collect_process_metrics()
|
||||||
.inspect_err(|error| {
|
.inspect_err(|error| {
|
||||||
warn!(%error, "采集 api-server 进程内存指标失败");
|
warn!(%error, "采集 api-server 进程指标失败");
|
||||||
})
|
})
|
||||||
.ok()
|
.ok()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct CpuUsageSample {
|
||||||
|
cpu_time_seconds: f64,
|
||||||
|
observed_at: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_cpu_usage_percent(cpu_time_seconds: f64, observed_at: Instant) -> Option<f64> {
|
||||||
|
static LAST_SAMPLE: OnceLock<Mutex<Option<CpuUsageSample>>> = OnceLock::new();
|
||||||
|
|
||||||
|
let mut last_sample = LAST_SAMPLE.get_or_init(|| Mutex::new(None)).lock().ok()?;
|
||||||
|
let previous = *last_sample;
|
||||||
|
*last_sample = Some(CpuUsageSample {
|
||||||
|
cpu_time_seconds,
|
||||||
|
observed_at,
|
||||||
|
});
|
||||||
|
|
||||||
|
let previous = previous?;
|
||||||
|
let wall_delta_seconds = observed_at
|
||||||
|
.checked_duration_since(previous.observed_at)?
|
||||||
|
.as_secs_f64();
|
||||||
|
cpu_usage_ratio_between_samples(
|
||||||
|
previous.cpu_time_seconds,
|
||||||
|
cpu_time_seconds,
|
||||||
|
0.0,
|
||||||
|
wall_delta_seconds,
|
||||||
|
)
|
||||||
|
.map(|ratio| ratio * 100.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cpu_usage_ratio_between_samples(
|
||||||
|
previous_cpu_seconds: f64,
|
||||||
|
current_cpu_seconds: f64,
|
||||||
|
previous_wall_seconds: f64,
|
||||||
|
current_wall_seconds: f64,
|
||||||
|
) -> Option<f64> {
|
||||||
|
let cpu_delta_seconds = current_cpu_seconds - previous_cpu_seconds;
|
||||||
|
let wall_delta_seconds = current_wall_seconds - previous_wall_seconds;
|
||||||
|
if cpu_delta_seconds < 0.0 || wall_delta_seconds <= 0.0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(cpu_delta_seconds / wall_delta_seconds)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||||
use windows_sys::Win32::{
|
use windows_sys::Win32::{
|
||||||
@@ -149,16 +229,52 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
|||||||
Some(u64::from(handle_count))
|
Some(u64::from(handle_count))
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let cpu_time_seconds = windows_process_cpu_time_seconds(handle);
|
||||||
|
|
||||||
Ok(ProcessMetricsSnapshot {
|
Ok(ProcessMetricsSnapshot {
|
||||||
rss_bytes: counters.WorkingSetSize as u64,
|
rss_bytes: counters.WorkingSetSize as u64,
|
||||||
private_bytes: Some(counters.PrivateUsage as u64),
|
private_bytes: Some(counters.PrivateUsage as u64),
|
||||||
virtual_bytes: Some(counters.PrivateUsage as u64),
|
virtual_bytes: Some(counters.PrivateUsage as u64),
|
||||||
|
cpu_time_seconds,
|
||||||
thread_count: u64::from(unsafe { GetCurrentProcessId() }.thread_count()?),
|
thread_count: u64::from(unsafe { GetCurrentProcessId() }.thread_count()?),
|
||||||
windows_handle_count: handle_count,
|
windows_handle_count: handle_count,
|
||||||
unix_fd_count: None,
|
unix_fd_count: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
fn windows_process_cpu_time_seconds(handle: windows_sys::Win32::Foundation::HANDLE) -> Option<f64> {
|
||||||
|
use windows_sys::Win32::{
|
||||||
|
Foundation::FILETIME,
|
||||||
|
System::Threading::GetProcessTimes,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut creation_time = FILETIME::default();
|
||||||
|
let mut exit_time = FILETIME::default();
|
||||||
|
let mut kernel_time = FILETIME::default();
|
||||||
|
let mut user_time = FILETIME::default();
|
||||||
|
let ok = unsafe {
|
||||||
|
GetProcessTimes(
|
||||||
|
handle,
|
||||||
|
&mut creation_time,
|
||||||
|
&mut exit_time,
|
||||||
|
&mut kernel_time,
|
||||||
|
&mut user_time,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
if ok == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_100ns = filetime_100ns(kernel_time) + filetime_100ns(user_time);
|
||||||
|
Some(total_100ns as f64 / 10_000_000.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
fn filetime_100ns(filetime: windows_sys::Win32::Foundation::FILETIME) -> u64 {
|
||||||
|
((filetime.dwHighDateTime as u64) << 32) | u64::from(filetime.dwLowDateTime)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
trait WindowsProcessThreadCount {
|
trait WindowsProcessThreadCount {
|
||||||
fn thread_count(self) -> Result<u32, String>;
|
fn thread_count(self) -> Result<u32, String>;
|
||||||
@@ -207,6 +323,8 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
|||||||
.map_err(|error| format!("read /proc/self/status failed: {error}"))?;
|
.map_err(|error| format!("read /proc/self/status failed: {error}"))?;
|
||||||
let statm = std::fs::read_to_string("/proc/self/statm")
|
let statm = std::fs::read_to_string("/proc/self/statm")
|
||||||
.map_err(|error| format!("read /proc/self/statm failed: {error}"))?;
|
.map_err(|error| format!("read /proc/self/statm failed: {error}"))?;
|
||||||
|
let stat = std::fs::read_to_string("/proc/self/stat")
|
||||||
|
.map_err(|error| format!("read /proc/self/stat failed: {error}"))?;
|
||||||
let page_size = linux_page_size_bytes()?;
|
let page_size = linux_page_size_bytes()?;
|
||||||
|
|
||||||
let rss_bytes = parse_status_kb(&status, "VmRSS:")
|
let rss_bytes = parse_status_kb(&status, "VmRSS:")
|
||||||
@@ -218,6 +336,7 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
|||||||
.or_else(|| parse_statm_pages(&statm, 0).map(|value| value * page_size))
|
.or_else(|| parse_statm_pages(&statm, 0).map(|value| value * page_size))
|
||||||
.ok_or_else(|| "missing VmSize/statm size field".to_string())?;
|
.ok_or_else(|| "missing VmSize/statm size field".to_string())?;
|
||||||
let private_bytes = parse_status_kb(&status, "VmData:").map(|value| value * 1024);
|
let private_bytes = parse_status_kb(&status, "VmData:").map(|value| value * 1024);
|
||||||
|
let cpu_time_seconds = linux_cpu_time_seconds(&stat)?;
|
||||||
let thread_count = parse_status_u64(&status, "Threads:")
|
let thread_count = parse_status_u64(&status, "Threads:")
|
||||||
.ok_or_else(|| "missing Threads field".to_string())?;
|
.ok_or_else(|| "missing Threads field".to_string())?;
|
||||||
|
|
||||||
@@ -225,12 +344,52 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
|||||||
rss_bytes,
|
rss_bytes,
|
||||||
private_bytes,
|
private_bytes,
|
||||||
virtual_bytes: Some(virtual_bytes),
|
virtual_bytes: Some(virtual_bytes),
|
||||||
|
cpu_time_seconds: Some(cpu_time_seconds),
|
||||||
thread_count,
|
thread_count,
|
||||||
windows_handle_count: None,
|
windows_handle_count: None,
|
||||||
unix_fd_count: linux_fd_count(),
|
unix_fd_count: linux_fd_count(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
fn linux_cpu_time_seconds(stat: &str) -> Result<f64, String> {
|
||||||
|
let cpu_ticks = parse_linux_proc_stat_cpu_ticks(stat)
|
||||||
|
.ok_or_else(|| "missing /proc/self/stat utime/stime fields".to_string())?;
|
||||||
|
let ticks_per_second = linux_clock_ticks_per_second()?;
|
||||||
|
Ok(cpu_ticks as f64 / ticks_per_second as f64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
fn linux_clock_ticks_per_second() -> Result<u64, String> {
|
||||||
|
static CLOCK_TICKS_PER_SECOND: OnceLock<Result<u64, String>> = OnceLock::new();
|
||||||
|
|
||||||
|
CLOCK_TICKS_PER_SECOND
|
||||||
|
.get_or_init(|| {
|
||||||
|
let output = std::process::Command::new("getconf")
|
||||||
|
.arg("CLK_TCK")
|
||||||
|
.output()
|
||||||
|
.map_err(|error| format!("getconf CLK_TCK failed: {error}"))?;
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(format!("getconf CLK_TCK exited with {}", output.status));
|
||||||
|
}
|
||||||
|
let text = String::from_utf8(output.stdout)
|
||||||
|
.map_err(|error| format!("getconf CLK_TCK output is not utf8: {error}"))?;
|
||||||
|
text.trim()
|
||||||
|
.parse::<u64>()
|
||||||
|
.map_err(|error| format!("parse CLK_TCK failed: {error}"))
|
||||||
|
})
|
||||||
|
.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
fn parse_linux_proc_stat_cpu_ticks(stat: &str) -> Option<u64> {
|
||||||
|
let fields_after_comm = stat.rsplit_once(") ")?.1;
|
||||||
|
let mut fields = fields_after_comm.split_whitespace();
|
||||||
|
let utime = fields.nth(11)?.parse::<u64>().ok()?;
|
||||||
|
let stime = fields.next()?.parse::<u64>().ok()?;
|
||||||
|
Some(utime + stime)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
fn linux_page_size_bytes() -> Result<u64, String> {
|
fn linux_page_size_bytes() -> Result<u64, String> {
|
||||||
let output = std::process::Command::new("getconf")
|
let output = std::process::Command::new("getconf")
|
||||||
@@ -282,8 +441,12 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::cpu_usage_ratio_between_samples;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
use super::{parse_statm_pages, parse_status_kb, parse_status_u64};
|
use super::{
|
||||||
|
parse_linux_proc_stat_cpu_ticks, parse_statm_pages, parse_status_kb, parse_status_u64,
|
||||||
|
};
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
#[test]
|
#[test]
|
||||||
@@ -303,4 +466,28 @@ mod tests {
|
|||||||
assert_eq!(parse_statm_pages("100 20 0 0 0 0 0", 1), Some(20));
|
assert_eq!(parse_statm_pages("100 20 0 0 0 0 0", 1), Some(20));
|
||||||
assert_eq!(parse_statm_pages("100 20", 7), None);
|
assert_eq!(parse_statm_pages("100 20", 7), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
#[test]
|
||||||
|
fn parses_linux_proc_stat_cpu_ticks_with_space_in_process_name() {
|
||||||
|
let stat = "123 (api server) S 1 2 3 4 5 6 7 8 9 10 120 30 0 0 20 0 18 0 12345";
|
||||||
|
|
||||||
|
assert_eq!(parse_linux_proc_stat_cpu_ticks(stat), Some(150));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cpu_usage_ratio_uses_cpu_time_delta_over_wall_time() {
|
||||||
|
assert_eq!(
|
||||||
|
cpu_usage_ratio_between_samples(10.0, 12.5, 100.0, 101.0),
|
||||||
|
Some(2.5)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
cpu_usage_ratio_between_samples(10.0, 9.0, 100.0, 101.0),
|
||||||
|
None
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
cpu_usage_ratio_between_samples(10.0, 11.0, 100.0, 100.0),
|
||||||
|
None
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1534,6 +1534,13 @@ pub async fn list_puzzle_gallery(
|
|||||||
crate::telemetry::record_puzzle_gallery_cache_hit();
|
crate::telemetry::record_puzzle_gallery_cache_hit();
|
||||||
return Ok(puzzle_gallery_cached_json(&request_context, response));
|
return Ok(puzzle_gallery_cached_json(&request_context, response));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(response) = state.puzzle_gallery_cache().read_stale_response().await {
|
||||||
|
crate::telemetry::record_puzzle_gallery_cache_stale_hit();
|
||||||
|
spawn_puzzle_gallery_cache_refresh(state.clone());
|
||||||
|
return Ok(puzzle_gallery_cached_json(&request_context, response));
|
||||||
|
}
|
||||||
|
|
||||||
crate::telemetry::record_puzzle_gallery_cache_miss();
|
crate::telemetry::record_puzzle_gallery_cache_miss();
|
||||||
let _rebuild_guard = state.puzzle_gallery_cache().acquire_rebuild_guard().await;
|
let _rebuild_guard = state.puzzle_gallery_cache().acquire_rebuild_guard().await;
|
||||||
if let Some(response) = state.puzzle_gallery_cache().read_fresh_response().await {
|
if let Some(response) = state.puzzle_gallery_cache().read_fresh_response().await {
|
||||||
@@ -1579,7 +1586,57 @@ pub async fn list_puzzle_gallery(
|
|||||||
cached_response.data_json_len(),
|
cached_response.data_json_len(),
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(puzzle_gallery_cached_json(&request_context, cached_response))
|
Ok(puzzle_gallery_cached_json(
|
||||||
|
&request_context,
|
||||||
|
cached_response,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn spawn_puzzle_gallery_cache_refresh(state: AppState) {
|
||||||
|
let Some(rebuild_guard) = state
|
||||||
|
.puzzle_gallery_cache()
|
||||||
|
.try_acquire_owned_rebuild_guard()
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
crate::telemetry::record_puzzle_gallery_cache_refresh_started();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let _rebuild_guard = rebuild_guard;
|
||||||
|
let rebuild_started_at = std::time::Instant::now();
|
||||||
|
let refresh_result = async {
|
||||||
|
let items = state.spacetime_client().list_puzzle_gallery().await?;
|
||||||
|
let response = build_puzzle_gallery_window_response(
|
||||||
|
items
|
||||||
|
.into_iter()
|
||||||
|
.map(|item| map_puzzle_gallery_card_response(&state, item))
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
state
|
||||||
|
.puzzle_gallery_cache()
|
||||||
|
.store_response(response)
|
||||||
|
.await
|
||||||
|
.map_err(|error| SpacetimeClientError::Runtime(error.to_string()))
|
||||||
|
}
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match refresh_result {
|
||||||
|
Ok(cached_response) => {
|
||||||
|
crate::telemetry::record_puzzle_gallery_cache_rebuild(
|
||||||
|
rebuild_started_at.elapsed(),
|
||||||
|
cached_response.data_json_len(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
crate::telemetry::record_puzzle_gallery_cache_refresh_failed();
|
||||||
|
tracing::warn!(
|
||||||
|
provider = PUZZLE_GALLERY_PROVIDER,
|
||||||
|
error = %error,
|
||||||
|
"puzzle gallery cache background refresh failed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_puzzle_gallery_detail(
|
pub async fn get_puzzle_gallery_detail(
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use shared_contracts::{
|
|||||||
puzzle_works::PuzzleWorkSummaryResponse,
|
puzzle_works::PuzzleWorkSummaryResponse,
|
||||||
};
|
};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
sync::{Mutex, MutexGuard, RwLock},
|
sync::{Mutex, MutexGuard, OwnedMutexGuard, RwLock},
|
||||||
time,
|
time,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -69,6 +69,18 @@ impl PuzzleGalleryCache {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn read_stale_response(&self) -> Option<PuzzleGalleryCachedResponse> {
|
||||||
|
let guard = self.inner.read().await;
|
||||||
|
let entry = guard.as_ref()?;
|
||||||
|
Some(PuzzleGalleryCachedResponse {
|
||||||
|
data_json: entry.data_json.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_acquire_owned_rebuild_guard(&self) -> Option<OwnedMutexGuard<()>> {
|
||||||
|
self.rebuild_lock.clone().try_lock_owned().ok()
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn store_response(
|
pub async fn store_response(
|
||||||
&self,
|
&self,
|
||||||
response: PuzzleGalleryResponse,
|
response: PuzzleGalleryResponse,
|
||||||
@@ -205,4 +217,36 @@ mod tests {
|
|||||||
assert!(!response.has_more);
|
assert!(!response.has_more);
|
||||||
assert_eq!(response.next_cursor, None);
|
assert_eq!(response.next_cursor, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn stale_response_remains_readable_after_fresh_ttl() {
|
||||||
|
let cache = PuzzleGalleryCache::new();
|
||||||
|
let response =
|
||||||
|
build_puzzle_gallery_window_response((0..8).map(build_summary).collect::<Vec<_>>());
|
||||||
|
cache
|
||||||
|
.store_response(response)
|
||||||
|
.await
|
||||||
|
.expect("cache response should serialize");
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut guard = cache.inner.write().await;
|
||||||
|
let entry = guard.as_mut().expect("cache entry should exist");
|
||||||
|
entry.built_at = Instant::now() - PUZZLE_GALLERY_CACHE_TTL - Duration::from_secs(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(cache.read_fresh_response().await.is_none());
|
||||||
|
assert!(cache.read_stale_response().await.is_some());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn try_owned_rebuild_guard_allows_only_one_refresher() {
|
||||||
|
let cache = PuzzleGalleryCache::new();
|
||||||
|
let first_guard = cache.try_acquire_owned_rebuild_guard();
|
||||||
|
|
||||||
|
assert!(first_guard.is_some());
|
||||||
|
assert!(cache.try_acquire_owned_rebuild_guard().is_none());
|
||||||
|
|
||||||
|
drop(first_guard);
|
||||||
|
assert!(cache.try_acquire_owned_rebuild_guard().is_some());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use std::{
|
|||||||
time::{SystemTime, UNIX_EPOCH},
|
time::{SystemTime, UNIX_EPOCH},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use axum::extract::FromRef;
|
||||||
use module_ai::{AiTaskService, InMemoryAiTaskStore};
|
use module_ai::{AiTaskService, InMemoryAiTaskStore};
|
||||||
use module_auth::{
|
use module_auth::{
|
||||||
AuthUserService, InMemoryAuthStore, PasswordEntryService, PhoneAuthService,
|
AuthUserService, InMemoryAuthStore, PasswordEntryService, PhoneAuthService,
|
||||||
@@ -39,13 +40,113 @@ const ADMIN_ROLE: &str = "admin";
|
|||||||
|
|
||||||
pub type HttpRequestPermitPool = Semaphore;
|
pub type HttpRequestPermitPool = Semaphore;
|
||||||
|
|
||||||
// 当前阶段先保留最小共享状态壳,后续逐步接入配置、客户端与平台适配。
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
pub enum HttpRequestPermitPoolKind {
|
||||||
|
Default,
|
||||||
|
Gallery,
|
||||||
|
Detail,
|
||||||
|
Admin,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpRequestPermitPoolKind {
|
||||||
|
pub fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Default => "default",
|
||||||
|
Self::Gallery => "gallery",
|
||||||
|
Self::Detail => "detail",
|
||||||
|
Self::Admin => "admin",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct AppState {
|
pub struct HttpRequestPermitPools {
|
||||||
|
default: Option<Arc<HttpRequestPermitPool>>,
|
||||||
|
gallery: Option<Arc<HttpRequestPermitPool>>,
|
||||||
|
detail: Option<Arc<HttpRequestPermitPool>>,
|
||||||
|
admin: Option<Arc<HttpRequestPermitPool>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpRequestPermitPools {
|
||||||
|
fn from_config(config: &AppConfig) -> Self {
|
||||||
|
Self {
|
||||||
|
default: config
|
||||||
|
.max_concurrent_requests
|
||||||
|
.map(HttpRequestPermitPool::new)
|
||||||
|
.map(Arc::new),
|
||||||
|
gallery: config
|
||||||
|
.gallery_max_concurrent_requests
|
||||||
|
.map(HttpRequestPermitPool::new)
|
||||||
|
.map(Arc::new),
|
||||||
|
detail: config
|
||||||
|
.detail_max_concurrent_requests
|
||||||
|
.map(HttpRequestPermitPool::new)
|
||||||
|
.map(Arc::new),
|
||||||
|
admin: config
|
||||||
|
.admin_max_concurrent_requests
|
||||||
|
.map(HttpRequestPermitPool::new)
|
||||||
|
.map(Arc::new),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pool(
|
||||||
|
&self,
|
||||||
|
kind: HttpRequestPermitPoolKind,
|
||||||
|
) -> Option<(HttpRequestPermitPoolKind, Arc<HttpRequestPermitPool>)> {
|
||||||
|
let selected = match kind {
|
||||||
|
HttpRequestPermitPoolKind::Default => self.default.clone(),
|
||||||
|
HttpRequestPermitPoolKind::Gallery => self.gallery.clone(),
|
||||||
|
HttpRequestPermitPoolKind::Detail => self.detail.clone(),
|
||||||
|
HttpRequestPermitPoolKind::Admin => self.admin.clone(),
|
||||||
|
};
|
||||||
|
selected.map(|pool| (kind, pool)).or_else(|| {
|
||||||
|
self.default
|
||||||
|
.clone()
|
||||||
|
.map(|pool| (HttpRequestPermitPoolKind::Default, pool))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct BackpressureState {
|
||||||
|
permit_pools: HttpRequestPermitPools,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BackpressureState {
|
||||||
|
pub fn request_permit_pool(
|
||||||
|
&self,
|
||||||
|
kind: HttpRequestPermitPoolKind,
|
||||||
|
) -> Option<(HttpRequestPermitPoolKind, Arc<HttpRequestPermitPool>)> {
|
||||||
|
self.permit_pools.pool(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct AppState(Arc<AppStateInner>);
|
||||||
|
|
||||||
|
impl std::ops::Deref for AppState {
|
||||||
|
type Target = AppStateInner;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromRef<AppState> for BackpressureState {
|
||||||
|
fn from_ref(state: &AppState) -> Self {
|
||||||
|
Self {
|
||||||
|
permit_pools: state.http_request_permit_pools(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Axum/Hyper 会在路由树和连接 service 上频繁 clone state;AppState 外层必须保持浅拷贝。
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct AppStateInner {
|
||||||
// 配置会在后续中间件、路由和平台适配接入时逐步消费。
|
// 配置会在后续中间件、路由和平台适配接入时逐步消费。
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub config: AppConfig,
|
pub config: AppConfig,
|
||||||
http_request_permit_pool: Option<Arc<HttpRequestPermitPool>>,
|
http_request_permit_pools: HttpRequestPermitPools,
|
||||||
auth_jwt_config: JwtConfig,
|
auth_jwt_config: JwtConfig,
|
||||||
admin_runtime: Option<AdminRuntime>,
|
admin_runtime: Option<AdminRuntime>,
|
||||||
refresh_cookie_config: RefreshCookieConfig,
|
refresh_cookie_config: RefreshCookieConfig,
|
||||||
@@ -198,14 +299,11 @@ impl AppState {
|
|||||||
});
|
});
|
||||||
let llm_client = build_llm_client(&config)?;
|
let llm_client = build_llm_client(&config)?;
|
||||||
let creative_agent_gpt5_client = build_creative_agent_gpt5_client(&config)?;
|
let creative_agent_gpt5_client = build_creative_agent_gpt5_client(&config)?;
|
||||||
let http_request_permit_pool = config
|
let http_request_permit_pools = HttpRequestPermitPools::from_config(&config);
|
||||||
.max_concurrent_requests
|
|
||||||
.map(HttpRequestPermitPool::new)
|
|
||||||
.map(Arc::new);
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self(Arc::new(AppStateInner {
|
||||||
config,
|
config,
|
||||||
http_request_permit_pool,
|
http_request_permit_pools,
|
||||||
auth_jwt_config,
|
auth_jwt_config,
|
||||||
admin_runtime,
|
admin_runtime,
|
||||||
refresh_cookie_config,
|
refresh_cookie_config,
|
||||||
@@ -232,7 +330,7 @@ impl AppState {
|
|||||||
creative_agent_sessions: Arc::new(Mutex::new(HashMap::new())),
|
creative_agent_sessions: Arc::new(Mutex::new(HashMap::new())),
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
test_runtime_snapshot_store: Arc::new(Mutex::new(HashMap::new())),
|
test_runtime_snapshot_store: Arc::new(Mutex::new(HashMap::new())),
|
||||||
})
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn auth_jwt_config(&self) -> &JwtConfig {
|
pub fn auth_jwt_config(&self) -> &JwtConfig {
|
||||||
@@ -247,8 +345,8 @@ impl AppState {
|
|||||||
&self.refresh_cookie_config
|
&self.refresh_cookie_config
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn http_request_permit_pool(&self) -> Option<Arc<HttpRequestPermitPool>> {
|
pub fn http_request_permit_pools(&self) -> HttpRequestPermitPools {
|
||||||
self.http_request_permit_pool.clone()
|
self.http_request_permit_pools.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn upsert_creation_entry_type_config(
|
pub async fn upsert_creation_entry_type_config(
|
||||||
|
|||||||
@@ -12,10 +12,14 @@ use std::sync::{
|
|||||||
};
|
};
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::{request_context::resolve_request_id, state::AppState};
|
use crate::{
|
||||||
|
request_context::resolve_request_id,
|
||||||
|
state::{AppState, HttpRequestPermitPoolKind},
|
||||||
|
};
|
||||||
|
|
||||||
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
|
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
|
||||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<Arc<AtomicI64>> = OnceLock::new();
|
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<HttpRequestPermitsAvailableGauges> =
|
||||||
|
OnceLock::new();
|
||||||
|
|
||||||
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
||||||
pub async fn record_http_observability(
|
pub async fn record_http_observability(
|
||||||
@@ -78,29 +82,42 @@ pub async fn record_http_observability(
|
|||||||
track_response_body_in_flight(response)
|
track_response_body_in_flight(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn update_http_request_permits_available(available: usize) {
|
pub(crate) fn update_http_request_permits_available(
|
||||||
let gauge = HTTP_REQUEST_PERMITS_AVAILABLE.get_or_init(|| {
|
pool: HttpRequestPermitPoolKind,
|
||||||
let gauge = Arc::new(AtomicI64::new(0));
|
available: usize,
|
||||||
register_http_request_permits_available_metric(gauge.clone());
|
) {
|
||||||
gauge
|
HTTP_REQUEST_PERMITS_AVAILABLE
|
||||||
});
|
.get_or_init(register_http_request_permits_available_metric)
|
||||||
gauge.store(available.min(i64::MAX as usize) as i64, Ordering::Relaxed);
|
.store(pool, available);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn record_puzzle_gallery_cache_hit() {
|
pub(crate) fn record_puzzle_gallery_cache_hit() {
|
||||||
puzzle_gallery_cache_metrics().hits.add(1, &[]);
|
puzzle_gallery_cache_metrics().hits.add(1, &[]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_puzzle_gallery_cache_stale_hit() {
|
||||||
|
puzzle_gallery_cache_metrics().stale_hits.add(1, &[]);
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn record_puzzle_gallery_cache_miss() {
|
pub(crate) fn record_puzzle_gallery_cache_miss() {
|
||||||
puzzle_gallery_cache_metrics().misses.add(1, &[]);
|
puzzle_gallery_cache_metrics().misses.add(1, &[]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(duration: std::time::Duration, data_bytes: usize) {
|
pub(crate) fn record_puzzle_gallery_cache_refresh_started() {
|
||||||
|
puzzle_gallery_cache_metrics().refreshes_started.add(1, &[]);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_puzzle_gallery_cache_refresh_failed() {
|
||||||
|
puzzle_gallery_cache_metrics().refreshes_failed.add(1, &[]);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn record_puzzle_gallery_cache_rebuild(
|
||||||
|
duration: std::time::Duration,
|
||||||
|
data_bytes: usize,
|
||||||
|
) {
|
||||||
let metrics = puzzle_gallery_cache_metrics();
|
let metrics = puzzle_gallery_cache_metrics();
|
||||||
metrics.rebuilds.add(1, &[]);
|
metrics.rebuilds.add(1, &[]);
|
||||||
metrics
|
metrics.rebuild_duration.record(duration.as_secs_f64(), &[]);
|
||||||
.rebuild_duration
|
|
||||||
.record(duration.as_secs_f64(), &[]);
|
|
||||||
metrics
|
metrics
|
||||||
.data_json_bytes
|
.data_json_bytes
|
||||||
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
|
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
|
||||||
@@ -125,12 +142,44 @@ struct HttpMetrics {
|
|||||||
|
|
||||||
struct PuzzleGalleryCacheMetrics {
|
struct PuzzleGalleryCacheMetrics {
|
||||||
hits: Counter<u64>,
|
hits: Counter<u64>,
|
||||||
|
stale_hits: Counter<u64>,
|
||||||
misses: Counter<u64>,
|
misses: Counter<u64>,
|
||||||
|
refreshes_started: Counter<u64>,
|
||||||
|
refreshes_failed: Counter<u64>,
|
||||||
rebuilds: Counter<u64>,
|
rebuilds: Counter<u64>,
|
||||||
rebuild_duration: opentelemetry::metrics::Histogram<f64>,
|
rebuild_duration: opentelemetry::metrics::Histogram<f64>,
|
||||||
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
|
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct HttpRequestPermitsAvailableGauges {
|
||||||
|
default: Arc<AtomicI64>,
|
||||||
|
gallery: Arc<AtomicI64>,
|
||||||
|
detail: Arc<AtomicI64>,
|
||||||
|
admin: Arc<AtomicI64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpRequestPermitsAvailableGauges {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
default: Arc::new(AtomicI64::new(0)),
|
||||||
|
gallery: Arc::new(AtomicI64::new(0)),
|
||||||
|
detail: Arc::new(AtomicI64::new(0)),
|
||||||
|
admin: Arc::new(AtomicI64::new(0)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store(&self, pool: HttpRequestPermitPoolKind, available: usize) {
|
||||||
|
let value = available.min(i64::MAX as usize) as i64;
|
||||||
|
match pool {
|
||||||
|
HttpRequestPermitPoolKind::Default => &self.default,
|
||||||
|
HttpRequestPermitPoolKind::Gallery => &self.gallery,
|
||||||
|
HttpRequestPermitPoolKind::Detail => &self.detail,
|
||||||
|
HttpRequestPermitPoolKind::Admin => &self.admin,
|
||||||
|
}
|
||||||
|
.store(value, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct ResponseBodyInFlightGuard;
|
struct ResponseBodyInFlightGuard;
|
||||||
|
|
||||||
impl Drop for ResponseBodyInFlightGuard {
|
impl Drop for ResponseBodyInFlightGuard {
|
||||||
@@ -171,10 +220,22 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
|||||||
.u64_counter("genarrative.puzzle_gallery.cache.hits")
|
.u64_counter("genarrative.puzzle_gallery.cache.hits")
|
||||||
.with_description("Puzzle gallery response cache hits")
|
.with_description("Puzzle gallery response cache hits")
|
||||||
.build(),
|
.build(),
|
||||||
|
stale_hits: meter
|
||||||
|
.u64_counter("genarrative.puzzle_gallery.cache.stale_hits")
|
||||||
|
.with_description("Puzzle gallery stale response cache hits")
|
||||||
|
.build(),
|
||||||
misses: meter
|
misses: meter
|
||||||
.u64_counter("genarrative.puzzle_gallery.cache.misses")
|
.u64_counter("genarrative.puzzle_gallery.cache.misses")
|
||||||
.with_description("Puzzle gallery response cache misses")
|
.with_description("Puzzle gallery response cache misses")
|
||||||
.build(),
|
.build(),
|
||||||
|
refreshes_started: meter
|
||||||
|
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_started")
|
||||||
|
.with_description("Puzzle gallery background refresh start count")
|
||||||
|
.build(),
|
||||||
|
refreshes_failed: meter
|
||||||
|
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_failed")
|
||||||
|
.with_description("Puzzle gallery background refresh failure count")
|
||||||
|
.build(),
|
||||||
rebuilds: meter
|
rebuilds: meter
|
||||||
.u64_counter("genarrative.puzzle_gallery.cache.rebuilds")
|
.u64_counter("genarrative.puzzle_gallery.cache.rebuilds")
|
||||||
.with_description("Puzzle gallery response cache rebuild count")
|
.with_description("Puzzle gallery response cache rebuild count")
|
||||||
@@ -193,16 +254,49 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn register_http_request_permits_available_metric(gauge: Arc<AtomicI64>) {
|
fn register_http_request_permits_available_metric() -> HttpRequestPermitsAvailableGauges {
|
||||||
|
let gauges = HttpRequestPermitsAvailableGauges::new();
|
||||||
let meter = global::meter("genarrative-api");
|
let meter = global::meter("genarrative-api");
|
||||||
|
let default_gauge = gauges.default.clone();
|
||||||
|
let gallery_gauge = gauges.gallery.clone();
|
||||||
|
let detail_gauge = gauges.detail.clone();
|
||||||
|
let admin_gauge = gauges.admin.clone();
|
||||||
meter
|
meter
|
||||||
.i64_observable_up_down_counter("genarrative.http.server.request_permits.available")
|
.i64_observable_up_down_counter("genarrative.http.server.request_permits.available")
|
||||||
.with_unit("{permit}")
|
.with_unit("{permit}")
|
||||||
.with_description("Available api-server HTTP backpressure permits")
|
.with_description("Available api-server HTTP backpressure permits")
|
||||||
.with_callback(move |observer| {
|
.with_callback(move |observer| {
|
||||||
observer.observe(gauge.load(Ordering::Relaxed), &[]);
|
observer.observe(
|
||||||
|
default_gauge.load(Ordering::Relaxed),
|
||||||
|
&[KeyValue::new(
|
||||||
|
"pool",
|
||||||
|
HttpRequestPermitPoolKind::Default.as_str(),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
observer.observe(
|
||||||
|
gallery_gauge.load(Ordering::Relaxed),
|
||||||
|
&[KeyValue::new(
|
||||||
|
"pool",
|
||||||
|
HttpRequestPermitPoolKind::Gallery.as_str(),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
observer.observe(
|
||||||
|
detail_gauge.load(Ordering::Relaxed),
|
||||||
|
&[KeyValue::new(
|
||||||
|
"pool",
|
||||||
|
HttpRequestPermitPoolKind::Detail.as_str(),
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
observer.observe(
|
||||||
|
admin_gauge.load(Ordering::Relaxed),
|
||||||
|
&[KeyValue::new(
|
||||||
|
"pool",
|
||||||
|
HttpRequestPermitPoolKind::Admin.as_str(),
|
||||||
|
)],
|
||||||
|
);
|
||||||
})
|
})
|
||||||
.build();
|
.build();
|
||||||
|
gauges
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn register_http_runtime_metrics() {
|
pub(crate) fn register_http_runtime_metrics() {
|
||||||
@@ -284,19 +378,13 @@ mod tests {
|
|||||||
observability_route("/api/runtime/puzzle/runs/run-123/history"),
|
observability_route("/api/runtime/puzzle/runs/run-123/history"),
|
||||||
"/api/*"
|
"/api/*"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(observability_route("/admin/api/debug/http"), "/admin/api/*");
|
||||||
observability_route("/admin/api/debug/http"),
|
|
||||||
"/admin/api/*"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn resolve_request_scheme_uses_forwarded_proto_first_value() {
|
fn resolve_request_scheme_uses_forwarded_proto_first_value() {
|
||||||
let mut headers = HeaderMap::new();
|
let mut headers = HeaderMap::new();
|
||||||
headers.insert(
|
headers.insert("x-forwarded-proto", HeaderValue::from_static("https, http"));
|
||||||
"x-forwarded-proto",
|
|
||||||
HeaderValue::from_static("https, http"),
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(resolve_request_scheme(&headers), "https");
|
assert_eq!(resolve_request_scheme(&headers), "https");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2060,6 +2060,7 @@ fn map_sms_provider_error_to_phone_error(error: SmsProviderError) -> PhoneAuthEr
|
|||||||
SmsProviderError::InvalidConfig(message) => {
|
SmsProviderError::InvalidConfig(message) => {
|
||||||
PhoneAuthError::SmsProviderInvalidConfig(message)
|
PhoneAuthError::SmsProviderInvalidConfig(message)
|
||||||
}
|
}
|
||||||
|
SmsProviderError::InvalidVerifyCode => PhoneAuthError::InvalidVerifyCode,
|
||||||
SmsProviderError::Upstream(message) => PhoneAuthError::SmsProviderUpstream(message),
|
SmsProviderError::Upstream(message) => PhoneAuthError::SmsProviderUpstream(message),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user