Merge branch 'codex/container-simulate'
# Conflicts: # .hermes/shared-memory/decision-log.md # server-rs/crates/api-server/src/puzzle.rs # server-rs/crates/spacetime-client/src/mapper.rs
This commit is contained in:
@@ -46,7 +46,7 @@ shared-kernel = { workspace = true }
|
||||
shared-logging = { workspace = true }
|
||||
socket2 = { workspace = true }
|
||||
spacetime-client = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "net", "time", "sync"] }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "net", "time", "sync", "fs", "io-util"] }
|
||||
tokio-stream = { workspace = true }
|
||||
futures-util = { workspace = true }
|
||||
time = { workspace = true, features = ["formatting"] }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use axum::{
|
||||
Router,
|
||||
body::Body,
|
||||
extract::Extension,
|
||||
extract::{Extension, FromRef},
|
||||
http::Request,
|
||||
middleware,
|
||||
response::Response,
|
||||
@@ -22,7 +22,7 @@ use crate::{
|
||||
request_context::{RequestContext, attach_request_context, resolve_request_id},
|
||||
response_headers::propagate_request_id_header,
|
||||
runtime_inventory::get_runtime_inventory_state,
|
||||
state::AppState,
|
||||
state::{AppState, BackpressureState},
|
||||
telemetry::record_http_observability,
|
||||
tracking::record_route_tracking_event_after_success,
|
||||
vector_engine_audio_generation::{
|
||||
@@ -79,7 +79,7 @@ pub fn build_router(state: AppState) -> Router {
|
||||
))
|
||||
// HTTP 背压在业务路由外侧快拒绝,避免过载请求继续占用 SpacetimeDB facade 与业务执行资源。
|
||||
.layer(middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
BackpressureState::from_ref(&state),
|
||||
limit_concurrent_requests,
|
||||
))
|
||||
// 错误归一化层放在 tracing 里侧,让 tracing 记录到最终对外返回的状态与错误体形态。
|
||||
|
||||
@@ -13,11 +13,11 @@ use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};
|
||||
use crate::{
|
||||
http_error::AppError,
|
||||
request_context::RequestContext,
|
||||
state::{AppState, HttpRequestPermitPool},
|
||||
state::{BackpressureState, HttpRequestPermitPool, HttpRequestPermitPoolKind},
|
||||
};
|
||||
|
||||
pub async fn limit_concurrent_requests(
|
||||
State(state): State<AppState>,
|
||||
State(state): State<BackpressureState>,
|
||||
request: Request,
|
||||
next: Next,
|
||||
) -> Response {
|
||||
@@ -25,29 +25,38 @@ pub async fn limit_concurrent_requests(
|
||||
return next.run(request).await;
|
||||
}
|
||||
|
||||
let Some(permit_pool) = state.http_request_permit_pool() else {
|
||||
let requested_pool = classify_request_permit_pool(request.uri().path());
|
||||
let Some((permit_pool_kind, permit_pool)) = state.request_permit_pool(requested_pool) else {
|
||||
return next.run(request).await;
|
||||
};
|
||||
|
||||
match acquire_http_request_permit(permit_pool) {
|
||||
match acquire_http_request_permit(permit_pool_kind, permit_pool) {
|
||||
Ok(permit) => hold_permit_until_response_body_dropped(next.run(request).await, permit),
|
||||
Err(_) => reject_overloaded_request(&request),
|
||||
}
|
||||
}
|
||||
|
||||
fn acquire_http_request_permit(
|
||||
permit_pool_kind: HttpRequestPermitPoolKind,
|
||||
permit_pool: Arc<HttpRequestPermitPool>,
|
||||
) -> Result<HttpRequestPermitGuard, TryAcquireError> {
|
||||
match permit_pool.clone().try_acquire_owned() {
|
||||
Ok(permit) => {
|
||||
crate::telemetry::update_http_request_permits_available(permit_pool.available_permits());
|
||||
crate::telemetry::update_http_request_permits_available(
|
||||
permit_pool_kind,
|
||||
permit_pool.available_permits(),
|
||||
);
|
||||
Ok(HttpRequestPermitGuard {
|
||||
permit_pool_kind,
|
||||
permit: Some(permit),
|
||||
permit_pool,
|
||||
})
|
||||
}
|
||||
Err(error) => {
|
||||
crate::telemetry::update_http_request_permits_available(permit_pool.available_permits());
|
||||
crate::telemetry::update_http_request_permits_available(
|
||||
permit_pool_kind,
|
||||
permit_pool.available_permits(),
|
||||
);
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
@@ -66,6 +75,7 @@ fn hold_permit_until_response_body_dropped(
|
||||
}
|
||||
|
||||
struct HttpRequestPermitGuard {
|
||||
permit_pool_kind: HttpRequestPermitPoolKind,
|
||||
permit: Option<OwnedSemaphorePermit>,
|
||||
permit_pool: Arc<HttpRequestPermitPool>,
|
||||
}
|
||||
@@ -73,7 +83,10 @@ struct HttpRequestPermitGuard {
|
||||
impl Drop for HttpRequestPermitGuard {
|
||||
fn drop(&mut self) {
|
||||
drop(self.permit.take());
|
||||
crate::telemetry::update_http_request_permits_available(self.permit_pool.available_permits());
|
||||
crate::telemetry::update_http_request_permits_available(
|
||||
self.permit_pool_kind,
|
||||
self.permit_pool.available_permits(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,6 +105,44 @@ fn should_bypass_backpressure(request: &Request<Body>) -> bool {
|
||||
request.uri().path() == "/healthz"
|
||||
}
|
||||
|
||||
fn classify_request_permit_pool(path: &str) -> HttpRequestPermitPoolKind {
|
||||
if is_gallery_list_path(path) {
|
||||
HttpRequestPermitPoolKind::Gallery
|
||||
} else if is_gallery_detail_path(path) {
|
||||
HttpRequestPermitPoolKind::Detail
|
||||
} else if path.starts_with("/admin/api/") {
|
||||
HttpRequestPermitPoolKind::Admin
|
||||
} else {
|
||||
HttpRequestPermitPoolKind::Default
|
||||
}
|
||||
}
|
||||
|
||||
fn is_gallery_list_path(path: &str) -> bool {
|
||||
matches!(
|
||||
path,
|
||||
"/api/runtime/puzzle/gallery" | "/api/runtime/custom-world-gallery"
|
||||
)
|
||||
}
|
||||
|
||||
fn is_gallery_detail_path(path: &str) -> bool {
|
||||
let puzzle_prefix = "/api/runtime/puzzle/gallery/";
|
||||
if let Some(profile_id) = path.strip_prefix(puzzle_prefix) {
|
||||
return !profile_id.is_empty() && !profile_id.contains('/');
|
||||
}
|
||||
|
||||
let custom_world_prefix = "/api/runtime/custom-world-gallery/";
|
||||
if let Some(remainder) = path.strip_prefix(custom_world_prefix) {
|
||||
let mut segments = remainder.split('/');
|
||||
return matches!(
|
||||
(segments.next(), segments.next(), segments.next()),
|
||||
(Some(owner_user_id), Some(profile_id), None)
|
||||
if !owner_user_id.is_empty() && !profile_id.is_empty()
|
||||
);
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
@@ -107,9 +158,14 @@ mod tests {
|
||||
use tokio::sync::Notify;
|
||||
use tower::ServiceExt;
|
||||
|
||||
use crate::{config::AppConfig, state::AppState};
|
||||
use axum::extract::FromRef;
|
||||
|
||||
use super::limit_concurrent_requests;
|
||||
use crate::{
|
||||
config::AppConfig,
|
||||
state::{AppState, BackpressureState},
|
||||
};
|
||||
|
||||
use super::{classify_request_permit_pool, limit_concurrent_requests};
|
||||
|
||||
#[derive(Clone)]
|
||||
struct HeldRequestGate {
|
||||
@@ -138,13 +194,50 @@ mod tests {
|
||||
let mut config = AppConfig::default();
|
||||
config.max_concurrent_requests = Some(max_concurrent_requests);
|
||||
let state = AppState::new(config).expect("state should build");
|
||||
let backpressure_state = BackpressureState::from_ref(&state);
|
||||
|
||||
Router::new()
|
||||
.route("/held", get(held_request))
|
||||
.route("/fast", get(fast_request))
|
||||
.route("/healthz", get(fast_request))
|
||||
.layer(middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
backpressure_state,
|
||||
limit_concurrent_requests,
|
||||
))
|
||||
.layer(Extension(gate))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
fn build_grouped_test_app(
|
||||
default_max_concurrent_requests: usize,
|
||||
gallery_max_concurrent_requests: usize,
|
||||
admin_max_concurrent_requests: usize,
|
||||
gate: HeldRequestGate,
|
||||
) -> Router {
|
||||
let mut config = AppConfig::default();
|
||||
config.max_concurrent_requests = Some(default_max_concurrent_requests);
|
||||
config.gallery_max_concurrent_requests = Some(gallery_max_concurrent_requests);
|
||||
config.admin_max_concurrent_requests = Some(admin_max_concurrent_requests);
|
||||
let state = AppState::new(config).expect("state should build");
|
||||
let backpressure_state = BackpressureState::from_ref(&state);
|
||||
|
||||
Router::new()
|
||||
.route("/held", get(held_request))
|
||||
.route("/api/runtime/puzzle/gallery", get(held_request))
|
||||
.route("/api/runtime/custom-world-gallery", get(held_request))
|
||||
.route("/api/runtime/puzzle/gallery/profile-1", get(held_request))
|
||||
.route(
|
||||
"/api/runtime/puzzle/gallery/profile-1/like",
|
||||
get(fast_request),
|
||||
)
|
||||
.route(
|
||||
"/api/runtime/custom-world-gallery/user-1/profile-1",
|
||||
get(held_request),
|
||||
)
|
||||
.route("/admin/api/overview", get(held_request))
|
||||
.route("/fast", get(fast_request))
|
||||
.layer(middleware::from_fn_with_state(
|
||||
backpressure_state,
|
||||
limit_concurrent_requests,
|
||||
))
|
||||
.layer(Extension(gate))
|
||||
@@ -242,4 +335,147 @@ mod tests {
|
||||
.expect("third request should complete");
|
||||
assert_eq!(accepted_response.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn gallery_pool_rejects_gallery_without_blocking_default_routes() {
|
||||
let gate = HeldRequestGate {
|
||||
entered: Arc::new(Notify::new()),
|
||||
release: Arc::new(Notify::new()),
|
||||
};
|
||||
let app = build_grouped_test_app(2, 1, 1, gate.clone());
|
||||
let entered = gate.entered.notified();
|
||||
|
||||
let held_response = tokio::spawn(
|
||||
app.clone()
|
||||
.oneshot(test_request("/api/runtime/puzzle/gallery")),
|
||||
);
|
||||
entered.await;
|
||||
|
||||
let rejected_gallery_response = app
|
||||
.clone()
|
||||
.oneshot(test_request("/api/runtime/custom-world-gallery"))
|
||||
.await
|
||||
.expect("rejected gallery request should complete");
|
||||
assert_eq!(
|
||||
rejected_gallery_response.status(),
|
||||
StatusCode::TOO_MANY_REQUESTS
|
||||
);
|
||||
|
||||
let accepted_default_response = app
|
||||
.clone()
|
||||
.oneshot(test_request("/fast"))
|
||||
.await
|
||||
.expect("default request should complete");
|
||||
assert_eq!(accepted_default_response.status(), StatusCode::OK);
|
||||
|
||||
gate.release.notify_one();
|
||||
let completed_response = held_response
|
||||
.await
|
||||
.expect("held request task should join")
|
||||
.expect("held request should complete");
|
||||
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn detail_pool_falls_back_to_default_when_unset() {
|
||||
let gate = HeldRequestGate {
|
||||
entered: Arc::new(Notify::new()),
|
||||
release: Arc::new(Notify::new()),
|
||||
};
|
||||
let mut config = AppConfig::default();
|
||||
config.max_concurrent_requests = Some(1);
|
||||
config.detail_max_concurrent_requests = None;
|
||||
let state = AppState::new(config).expect("state should build");
|
||||
let backpressure_state = BackpressureState::from_ref(&state);
|
||||
let app = Router::new()
|
||||
.route("/api/runtime/puzzle/gallery/profile-1", get(held_request))
|
||||
.route("/fast", get(fast_request))
|
||||
.layer(middleware::from_fn_with_state(
|
||||
backpressure_state,
|
||||
limit_concurrent_requests,
|
||||
))
|
||||
.layer(Extension(gate.clone()))
|
||||
.with_state(state);
|
||||
let entered = gate.entered.notified();
|
||||
|
||||
let held_response = tokio::spawn(
|
||||
app.clone()
|
||||
.oneshot(test_request("/api/runtime/puzzle/gallery/profile-1")),
|
||||
);
|
||||
entered.await;
|
||||
|
||||
let rejected_default_response = app
|
||||
.clone()
|
||||
.oneshot(test_request("/fast"))
|
||||
.await
|
||||
.expect("default request should complete");
|
||||
assert_eq!(
|
||||
rejected_default_response.status(),
|
||||
StatusCode::TOO_MANY_REQUESTS
|
||||
);
|
||||
|
||||
gate.release.notify_one();
|
||||
let completed_response = held_response
|
||||
.await
|
||||
.expect("held request task should join")
|
||||
.expect("held request should complete");
|
||||
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn admin_pool_is_isolated_from_default_routes() {
|
||||
let gate = HeldRequestGate {
|
||||
entered: Arc::new(Notify::new()),
|
||||
release: Arc::new(Notify::new()),
|
||||
};
|
||||
let app = build_grouped_test_app(2, 1, 1, gate.clone());
|
||||
let entered = gate.entered.notified();
|
||||
|
||||
let held_response = tokio::spawn(app.clone().oneshot(test_request("/admin/api/overview")));
|
||||
entered.await;
|
||||
|
||||
let rejected_admin_response = app
|
||||
.clone()
|
||||
.oneshot(test_request("/admin/api/overview"))
|
||||
.await
|
||||
.expect("rejected admin request should complete");
|
||||
assert_eq!(
|
||||
rejected_admin_response.status(),
|
||||
StatusCode::TOO_MANY_REQUESTS
|
||||
);
|
||||
|
||||
let accepted_default_response = app
|
||||
.clone()
|
||||
.oneshot(test_request("/fast"))
|
||||
.await
|
||||
.expect("default request should complete");
|
||||
assert_eq!(accepted_default_response.status(), StatusCode::OK);
|
||||
|
||||
gate.release.notify_one();
|
||||
let completed_response = held_response
|
||||
.await
|
||||
.expect("held request task should join")
|
||||
.expect("held request should complete");
|
||||
assert_eq!(completed_response.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_only_exact_gallery_detail_paths_as_detail() {
|
||||
assert_eq!(
|
||||
classify_request_permit_pool("/api/runtime/puzzle/gallery/profile-1"),
|
||||
crate::state::HttpRequestPermitPoolKind::Detail
|
||||
);
|
||||
assert_eq!(
|
||||
classify_request_permit_pool("/api/runtime/puzzle/gallery/profile-1/like"),
|
||||
crate::state::HttpRequestPermitPoolKind::Default
|
||||
);
|
||||
assert_eq!(
|
||||
classify_request_permit_pool("/api/runtime/custom-world-gallery/user-1/profile-1"),
|
||||
crate::state::HttpRequestPermitPoolKind::Detail
|
||||
);
|
||||
assert_eq!(
|
||||
classify_request_permit_pool("/api/runtime/custom-world-gallery/user-1/profile-1/like"),
|
||||
crate::state::HttpRequestPermitPoolKind::Default
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,14 @@ pub struct AppConfig {
|
||||
pub listen_backlog: i32,
|
||||
pub worker_threads: Option<usize>,
|
||||
pub max_concurrent_requests: Option<usize>,
|
||||
pub gallery_max_concurrent_requests: Option<usize>,
|
||||
pub detail_max_concurrent_requests: Option<usize>,
|
||||
pub admin_max_concurrent_requests: Option<usize>,
|
||||
pub tracking_outbox_enabled: bool,
|
||||
pub tracking_outbox_dir: PathBuf,
|
||||
pub tracking_outbox_batch_size: usize,
|
||||
pub tracking_outbox_flush_interval: Duration,
|
||||
pub tracking_outbox_max_bytes: u64,
|
||||
pub log_filter: String,
|
||||
pub otel_enabled: bool,
|
||||
pub admin_username: Option<String>,
|
||||
@@ -154,6 +162,14 @@ impl Default for AppConfig {
|
||||
listen_backlog: 1024,
|
||||
worker_threads: None,
|
||||
max_concurrent_requests: None,
|
||||
gallery_max_concurrent_requests: None,
|
||||
detail_max_concurrent_requests: None,
|
||||
admin_max_concurrent_requests: None,
|
||||
tracking_outbox_enabled: true,
|
||||
tracking_outbox_dir: PathBuf::from("server-rs/.data/tracking-outbox"),
|
||||
tracking_outbox_batch_size: 500,
|
||||
tracking_outbox_flush_interval: Duration::from_millis(1_000),
|
||||
tracking_outbox_max_bytes: 256 * 1024 * 1024,
|
||||
log_filter: "info,tower_http=info".to_string(),
|
||||
otel_enabled: false,
|
||||
admin_username: None,
|
||||
@@ -322,6 +338,41 @@ impl AppConfig {
|
||||
{
|
||||
config.max_concurrent_requests = Some(max_concurrent_requests);
|
||||
}
|
||||
if let Some(max_concurrent_requests) =
|
||||
read_first_usize_env(&["GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS"])
|
||||
{
|
||||
config.gallery_max_concurrent_requests = Some(max_concurrent_requests);
|
||||
}
|
||||
if let Some(max_concurrent_requests) =
|
||||
read_first_usize_env(&["GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS"])
|
||||
{
|
||||
config.detail_max_concurrent_requests = Some(max_concurrent_requests);
|
||||
}
|
||||
if let Some(max_concurrent_requests) =
|
||||
read_first_usize_env(&["GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS"])
|
||||
{
|
||||
config.admin_max_concurrent_requests = Some(max_concurrent_requests);
|
||||
}
|
||||
if let Some(enabled) = read_first_bool_env(&["GENARRATIVE_TRACKING_OUTBOX_ENABLED"]) {
|
||||
config.tracking_outbox_enabled = enabled;
|
||||
}
|
||||
if let Some(dir) = read_first_non_empty_env(&["GENARRATIVE_TRACKING_OUTBOX_DIR"]) {
|
||||
config.tracking_outbox_dir = PathBuf::from(dir);
|
||||
}
|
||||
if let Some(batch_size) = read_first_usize_env(&["GENARRATIVE_TRACKING_OUTBOX_BATCH_SIZE"])
|
||||
{
|
||||
config.tracking_outbox_batch_size = batch_size;
|
||||
}
|
||||
if let Some(flush_interval_ms) =
|
||||
read_first_positive_u64_env(&["GENARRATIVE_TRACKING_OUTBOX_FLUSH_INTERVAL_MS"])
|
||||
{
|
||||
config.tracking_outbox_flush_interval = Duration::from_millis(flush_interval_ms);
|
||||
}
|
||||
if let Some(max_bytes) =
|
||||
read_first_positive_u64_env(&["GENARRATIVE_TRACKING_OUTBOX_MAX_BYTES"])
|
||||
{
|
||||
config.tracking_outbox_max_bytes = max_bytes;
|
||||
}
|
||||
if let Some(otel_enabled) = read_first_bool_env(&["GENARRATIVE_OTEL_ENABLED"]) {
|
||||
config.otel_enabled = otel_enabled;
|
||||
}
|
||||
@@ -1246,10 +1297,29 @@ mod tests {
|
||||
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
||||
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
||||
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_ENABLED");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_DIR");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_BATCH_SIZE");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_FLUSH_INTERVAL_MS");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_MAX_BYTES");
|
||||
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
||||
std::env::set_var("GENARRATIVE_API_LISTEN_BACKLOG", "2048");
|
||||
std::env::set_var("GENARRATIVE_API_WORKER_THREADS", "6");
|
||||
std::env::set_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS", "128");
|
||||
std::env::set_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS", "64");
|
||||
std::env::set_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS", "32");
|
||||
std::env::set_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS", "16");
|
||||
std::env::set_var("GENARRATIVE_TRACKING_OUTBOX_ENABLED", "false");
|
||||
std::env::set_var(
|
||||
"GENARRATIVE_TRACKING_OUTBOX_DIR",
|
||||
"/tmp/genarrative-tracking-outbox",
|
||||
);
|
||||
std::env::set_var("GENARRATIVE_TRACKING_OUTBOX_BATCH_SIZE", "250");
|
||||
std::env::set_var("GENARRATIVE_TRACKING_OUTBOX_FLUSH_INTERVAL_MS", "2000");
|
||||
std::env::set_var("GENARRATIVE_TRACKING_OUTBOX_MAX_BYTES", "1048576");
|
||||
std::env::set_var("GENARRATIVE_OTEL_ENABLED", "true");
|
||||
}
|
||||
|
||||
@@ -1257,12 +1327,34 @@ mod tests {
|
||||
assert_eq!(config.listen_backlog, 2048);
|
||||
assert_eq!(config.worker_threads, Some(6));
|
||||
assert_eq!(config.max_concurrent_requests, Some(128));
|
||||
assert_eq!(config.gallery_max_concurrent_requests, Some(64));
|
||||
assert_eq!(config.detail_max_concurrent_requests, Some(32));
|
||||
assert_eq!(config.admin_max_concurrent_requests, Some(16));
|
||||
assert!(!config.tracking_outbox_enabled);
|
||||
assert_eq!(
|
||||
config.tracking_outbox_dir,
|
||||
std::path::PathBuf::from("/tmp/genarrative-tracking-outbox")
|
||||
);
|
||||
assert_eq!(config.tracking_outbox_batch_size, 250);
|
||||
assert_eq!(
|
||||
config.tracking_outbox_flush_interval,
|
||||
std::time::Duration::from_millis(2_000)
|
||||
);
|
||||
assert_eq!(config.tracking_outbox_max_bytes, 1_048_576);
|
||||
assert!(config.otel_enabled);
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var("GENARRATIVE_API_LISTEN_BACKLOG");
|
||||
std::env::remove_var("GENARRATIVE_API_WORKER_THREADS");
|
||||
std::env::remove_var("GENARRATIVE_API_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_GALLERY_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_DETAIL_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_API_ADMIN_MAX_CONCURRENT_REQUESTS");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_ENABLED");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_DIR");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_BATCH_SIZE");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_FLUSH_INTERVAL_MS");
|
||||
std::env::remove_var("GENARRATIVE_TRACKING_OUTBOX_MAX_BYTES");
|
||||
std::env::remove_var("GENARRATIVE_OTEL_ENABLED");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,8 +55,8 @@ mod password_entry;
|
||||
mod password_management;
|
||||
mod phone_auth;
|
||||
mod platform_errors;
|
||||
mod profile_identity;
|
||||
mod process_metrics;
|
||||
mod profile_identity;
|
||||
mod prompt;
|
||||
mod puzzle;
|
||||
mod puzzle_agent_turn;
|
||||
@@ -80,6 +80,7 @@ mod story_battles;
|
||||
mod story_sessions;
|
||||
mod telemetry;
|
||||
mod tracking;
|
||||
mod tracking_outbox;
|
||||
mod vector_engine_audio_generation;
|
||||
mod visual_novel;
|
||||
mod volcengine_speech;
|
||||
@@ -154,6 +155,9 @@ async fn run_server(config: AppConfig) -> Result<(), io::Error> {
|
||||
.await
|
||||
.map_err(|error| std::io::Error::other(format!("初始化应用状态失败:{error}")))?;
|
||||
state.puzzle_gallery_cache().spawn_cleanup_task();
|
||||
if let Some(outbox) = state.tracking_outbox() {
|
||||
outbox.spawn_worker();
|
||||
}
|
||||
let router = build_router(state);
|
||||
|
||||
info!(
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use std::sync::OnceLock;
|
||||
use std::{
|
||||
sync::{Mutex, OnceLock},
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use opentelemetry::global;
|
||||
use tracing::warn;
|
||||
@@ -52,6 +55,38 @@ fn register_process_metrics_once() {
|
||||
})
|
||||
.build();
|
||||
|
||||
meter
|
||||
.f64_observable_counter("process.cpu.time")
|
||||
.with_unit("s")
|
||||
.with_description("api-server total user plus system CPU time")
|
||||
.with_callback(|observer| {
|
||||
let Some(snapshot) = ProcessMetricsSnapshot::collect() else {
|
||||
return;
|
||||
};
|
||||
if let Some(cpu_time_seconds) = snapshot.cpu_time_seconds {
|
||||
observer.observe(cpu_time_seconds, &[]);
|
||||
}
|
||||
})
|
||||
.build();
|
||||
|
||||
meter
|
||||
.f64_observable_gauge("genarrative.process.cpu.usage_percent")
|
||||
.with_unit("%")
|
||||
.with_description("api-server process CPU usage between metric collections")
|
||||
.with_callback(|observer| {
|
||||
let Some(snapshot) = ProcessMetricsSnapshot::collect() else {
|
||||
return;
|
||||
};
|
||||
if let Some(cpu_time_seconds) = snapshot.cpu_time_seconds {
|
||||
if let Some(usage_percent) =
|
||||
process_cpu_usage_percent(cpu_time_seconds, Instant::now())
|
||||
{
|
||||
observer.observe(usage_percent, &[]);
|
||||
}
|
||||
}
|
||||
})
|
||||
.build();
|
||||
|
||||
meter
|
||||
.i64_observable_up_down_counter("process.thread.count")
|
||||
.with_unit("{thread}")
|
||||
@@ -97,11 +132,12 @@ fn to_i64(value: u64) -> i64 {
|
||||
value.min(i64::MAX as u64) as i64
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
struct ProcessMetricsSnapshot {
|
||||
rss_bytes: u64,
|
||||
private_bytes: Option<u64>,
|
||||
virtual_bytes: Option<u64>,
|
||||
cpu_time_seconds: Option<f64>,
|
||||
thread_count: u64,
|
||||
windows_handle_count: Option<u64>,
|
||||
unix_fd_count: Option<u64>,
|
||||
@@ -111,12 +147,56 @@ impl ProcessMetricsSnapshot {
|
||||
fn collect() -> Option<Self> {
|
||||
collect_process_metrics()
|
||||
.inspect_err(|error| {
|
||||
warn!(%error, "采集 api-server 进程内存指标失败");
|
||||
warn!(%error, "采集 api-server 进程指标失败");
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct CpuUsageSample {
|
||||
cpu_time_seconds: f64,
|
||||
observed_at: Instant,
|
||||
}
|
||||
|
||||
fn process_cpu_usage_percent(cpu_time_seconds: f64, observed_at: Instant) -> Option<f64> {
|
||||
static LAST_SAMPLE: OnceLock<Mutex<Option<CpuUsageSample>>> = OnceLock::new();
|
||||
|
||||
let mut last_sample = LAST_SAMPLE.get_or_init(|| Mutex::new(None)).lock().ok()?;
|
||||
let previous = *last_sample;
|
||||
*last_sample = Some(CpuUsageSample {
|
||||
cpu_time_seconds,
|
||||
observed_at,
|
||||
});
|
||||
|
||||
let previous = previous?;
|
||||
let wall_delta_seconds = observed_at
|
||||
.checked_duration_since(previous.observed_at)?
|
||||
.as_secs_f64();
|
||||
cpu_usage_ratio_between_samples(
|
||||
previous.cpu_time_seconds,
|
||||
cpu_time_seconds,
|
||||
0.0,
|
||||
wall_delta_seconds,
|
||||
)
|
||||
.map(|ratio| ratio * 100.0)
|
||||
}
|
||||
|
||||
fn cpu_usage_ratio_between_samples(
|
||||
previous_cpu_seconds: f64,
|
||||
current_cpu_seconds: f64,
|
||||
previous_wall_seconds: f64,
|
||||
current_wall_seconds: f64,
|
||||
) -> Option<f64> {
|
||||
let cpu_delta_seconds = current_cpu_seconds - previous_cpu_seconds;
|
||||
let wall_delta_seconds = current_wall_seconds - previous_wall_seconds;
|
||||
if cpu_delta_seconds < 0.0 || wall_delta_seconds <= 0.0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(cpu_delta_seconds / wall_delta_seconds)
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
use windows_sys::Win32::{
|
||||
@@ -149,16 +229,52 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
Some(u64::from(handle_count))
|
||||
};
|
||||
|
||||
let cpu_time_seconds = windows_process_cpu_time_seconds(handle);
|
||||
|
||||
Ok(ProcessMetricsSnapshot {
|
||||
rss_bytes: counters.WorkingSetSize as u64,
|
||||
private_bytes: Some(counters.PrivateUsage as u64),
|
||||
virtual_bytes: Some(counters.PrivateUsage as u64),
|
||||
cpu_time_seconds,
|
||||
thread_count: u64::from(unsafe { GetCurrentProcessId() }.thread_count()?),
|
||||
windows_handle_count: handle_count,
|
||||
unix_fd_count: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn windows_process_cpu_time_seconds(handle: windows_sys::Win32::Foundation::HANDLE) -> Option<f64> {
|
||||
use windows_sys::Win32::{
|
||||
Foundation::FILETIME,
|
||||
System::Threading::GetProcessTimes,
|
||||
};
|
||||
|
||||
let mut creation_time = FILETIME::default();
|
||||
let mut exit_time = FILETIME::default();
|
||||
let mut kernel_time = FILETIME::default();
|
||||
let mut user_time = FILETIME::default();
|
||||
let ok = unsafe {
|
||||
GetProcessTimes(
|
||||
handle,
|
||||
&mut creation_time,
|
||||
&mut exit_time,
|
||||
&mut kernel_time,
|
||||
&mut user_time,
|
||||
)
|
||||
};
|
||||
if ok == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let total_100ns = filetime_100ns(kernel_time) + filetime_100ns(user_time);
|
||||
Some(total_100ns as f64 / 10_000_000.0)
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn filetime_100ns(filetime: windows_sys::Win32::Foundation::FILETIME) -> u64 {
|
||||
((filetime.dwHighDateTime as u64) << 32) | u64::from(filetime.dwLowDateTime)
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
trait WindowsProcessThreadCount {
|
||||
fn thread_count(self) -> Result<u32, String>;
|
||||
@@ -207,6 +323,8 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
.map_err(|error| format!("read /proc/self/status failed: {error}"))?;
|
||||
let statm = std::fs::read_to_string("/proc/self/statm")
|
||||
.map_err(|error| format!("read /proc/self/statm failed: {error}"))?;
|
||||
let stat = std::fs::read_to_string("/proc/self/stat")
|
||||
.map_err(|error| format!("read /proc/self/stat failed: {error}"))?;
|
||||
let page_size = linux_page_size_bytes()?;
|
||||
|
||||
let rss_bytes = parse_status_kb(&status, "VmRSS:")
|
||||
@@ -218,6 +336,7 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
.or_else(|| parse_statm_pages(&statm, 0).map(|value| value * page_size))
|
||||
.ok_or_else(|| "missing VmSize/statm size field".to_string())?;
|
||||
let private_bytes = parse_status_kb(&status, "VmData:").map(|value| value * 1024);
|
||||
let cpu_time_seconds = linux_cpu_time_seconds(&stat)?;
|
||||
let thread_count = parse_status_u64(&status, "Threads:")
|
||||
.ok_or_else(|| "missing Threads field".to_string())?;
|
||||
|
||||
@@ -225,12 +344,52 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
rss_bytes,
|
||||
private_bytes,
|
||||
virtual_bytes: Some(virtual_bytes),
|
||||
cpu_time_seconds: Some(cpu_time_seconds),
|
||||
thread_count,
|
||||
windows_handle_count: None,
|
||||
unix_fd_count: linux_fd_count(),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn linux_cpu_time_seconds(stat: &str) -> Result<f64, String> {
|
||||
let cpu_ticks = parse_linux_proc_stat_cpu_ticks(stat)
|
||||
.ok_or_else(|| "missing /proc/self/stat utime/stime fields".to_string())?;
|
||||
let ticks_per_second = linux_clock_ticks_per_second()?;
|
||||
Ok(cpu_ticks as f64 / ticks_per_second as f64)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn linux_clock_ticks_per_second() -> Result<u64, String> {
|
||||
static CLOCK_TICKS_PER_SECOND: OnceLock<Result<u64, String>> = OnceLock::new();
|
||||
|
||||
CLOCK_TICKS_PER_SECOND
|
||||
.get_or_init(|| {
|
||||
let output = std::process::Command::new("getconf")
|
||||
.arg("CLK_TCK")
|
||||
.output()
|
||||
.map_err(|error| format!("getconf CLK_TCK failed: {error}"))?;
|
||||
if !output.status.success() {
|
||||
return Err(format!("getconf CLK_TCK exited with {}", output.status));
|
||||
}
|
||||
let text = String::from_utf8(output.stdout)
|
||||
.map_err(|error| format!("getconf CLK_TCK output is not utf8: {error}"))?;
|
||||
text.trim()
|
||||
.parse::<u64>()
|
||||
.map_err(|error| format!("parse CLK_TCK failed: {error}"))
|
||||
})
|
||||
.clone()
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn parse_linux_proc_stat_cpu_ticks(stat: &str) -> Option<u64> {
|
||||
let fields_after_comm = stat.rsplit_once(") ")?.1;
|
||||
let mut fields = fields_after_comm.split_whitespace();
|
||||
let utime = fields.nth(11)?.parse::<u64>().ok()?;
|
||||
let stime = fields.next()?.parse::<u64>().ok()?;
|
||||
Some(utime + stime)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn linux_page_size_bytes() -> Result<u64, String> {
|
||||
let output = std::process::Command::new("getconf")
|
||||
@@ -282,8 +441,12 @@ fn collect_process_metrics() -> Result<ProcessMetricsSnapshot, String> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::cpu_usage_ratio_between_samples;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use super::{parse_statm_pages, parse_status_kb, parse_status_u64};
|
||||
use super::{
|
||||
parse_linux_proc_stat_cpu_ticks, parse_statm_pages, parse_status_kb, parse_status_u64,
|
||||
};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[test]
|
||||
@@ -303,4 +466,28 @@ mod tests {
|
||||
assert_eq!(parse_statm_pages("100 20 0 0 0 0 0", 1), Some(20));
|
||||
assert_eq!(parse_statm_pages("100 20", 7), None);
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[test]
|
||||
fn parses_linux_proc_stat_cpu_ticks_with_space_in_process_name() {
|
||||
let stat = "123 (api server) S 1 2 3 4 5 6 7 8 9 10 120 30 0 0 20 0 18 0 12345";
|
||||
|
||||
assert_eq!(parse_linux_proc_stat_cpu_ticks(stat), Some(150));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cpu_usage_ratio_uses_cpu_time_delta_over_wall_time() {
|
||||
assert_eq!(
|
||||
cpu_usage_ratio_between_samples(10.0, 12.5, 100.0, 101.0),
|
||||
Some(2.5)
|
||||
);
|
||||
assert_eq!(
|
||||
cpu_usage_ratio_between_samples(10.0, 9.0, 100.0, 101.0),
|
||||
None
|
||||
);
|
||||
assert_eq!(
|
||||
cpu_usage_ratio_between_samples(10.0, 11.0, 100.0, 100.0),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ use shared_contracts::{
|
||||
puzzle_works::PuzzleWorkSummaryResponse,
|
||||
};
|
||||
use tokio::{
|
||||
sync::{Mutex, MutexGuard, RwLock},
|
||||
sync::{Mutex, MutexGuard, OwnedMutexGuard, RwLock},
|
||||
time,
|
||||
};
|
||||
|
||||
@@ -69,6 +69,18 @@ impl PuzzleGalleryCache {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn read_stale_response(&self) -> Option<PuzzleGalleryCachedResponse> {
|
||||
let guard = self.inner.read().await;
|
||||
let entry = guard.as_ref()?;
|
||||
Some(PuzzleGalleryCachedResponse {
|
||||
data_json: entry.data_json.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn try_acquire_owned_rebuild_guard(&self) -> Option<OwnedMutexGuard<()>> {
|
||||
self.rebuild_lock.clone().try_lock_owned().ok()
|
||||
}
|
||||
|
||||
pub async fn store_response(
|
||||
&self,
|
||||
response: PuzzleGalleryResponse,
|
||||
@@ -205,4 +217,36 @@ mod tests {
|
||||
assert!(!response.has_more);
|
||||
assert_eq!(response.next_cursor, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stale_response_remains_readable_after_fresh_ttl() {
|
||||
let cache = PuzzleGalleryCache::new();
|
||||
let response =
|
||||
build_puzzle_gallery_window_response((0..8).map(build_summary).collect::<Vec<_>>());
|
||||
cache
|
||||
.store_response(response)
|
||||
.await
|
||||
.expect("cache response should serialize");
|
||||
|
||||
{
|
||||
let mut guard = cache.inner.write().await;
|
||||
let entry = guard.as_mut().expect("cache entry should exist");
|
||||
entry.built_at = Instant::now() - PUZZLE_GALLERY_CACHE_TTL - Duration::from_secs(1);
|
||||
}
|
||||
|
||||
assert!(cache.read_fresh_response().await.is_none());
|
||||
assert!(cache.read_stale_response().await.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn try_owned_rebuild_guard_allows_only_one_refresher() {
|
||||
let cache = PuzzleGalleryCache::new();
|
||||
let first_guard = cache.try_acquire_owned_rebuild_guard();
|
||||
|
||||
assert!(first_guard.is_some());
|
||||
assert!(cache.try_acquire_owned_rebuild_guard().is_none());
|
||||
|
||||
drop(first_guard);
|
||||
assert!(cache.try_acquire_owned_rebuild_guard().is_some());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::{
|
||||
time::{SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
|
||||
use axum::extract::FromRef;
|
||||
use module_ai::{AiTaskService, InMemoryAiTaskStore};
|
||||
use module_auth::{
|
||||
AuthUserService, InMemoryAuthStore, PasswordEntryService, PhoneAuthService,
|
||||
@@ -32,6 +33,7 @@ use tracing::{info, warn};
|
||||
|
||||
use crate::config::AppConfig;
|
||||
use crate::puzzle_gallery_cache::PuzzleGalleryCache;
|
||||
use crate::tracking_outbox::TrackingOutbox;
|
||||
use crate::wechat_pay::{WechatPayClient, map_wechat_pay_init_error};
|
||||
use crate::wechat_provider::build_wechat_provider;
|
||||
|
||||
@@ -39,13 +41,113 @@ const ADMIN_ROLE: &str = "admin";
|
||||
|
||||
pub type HttpRequestPermitPool = Semaphore;
|
||||
|
||||
// 当前阶段先保留最小共享状态壳,后续逐步接入配置、客户端与平台适配。
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum HttpRequestPermitPoolKind {
|
||||
Default,
|
||||
Gallery,
|
||||
Detail,
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl HttpRequestPermitPoolKind {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Default => "default",
|
||||
Self::Gallery => "gallery",
|
||||
Self::Detail => "detail",
|
||||
Self::Admin => "admin",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AppState {
|
||||
pub struct HttpRequestPermitPools {
|
||||
default: Option<Arc<HttpRequestPermitPool>>,
|
||||
gallery: Option<Arc<HttpRequestPermitPool>>,
|
||||
detail: Option<Arc<HttpRequestPermitPool>>,
|
||||
admin: Option<Arc<HttpRequestPermitPool>>,
|
||||
}
|
||||
|
||||
impl HttpRequestPermitPools {
|
||||
fn from_config(config: &AppConfig) -> Self {
|
||||
Self {
|
||||
default: config
|
||||
.max_concurrent_requests
|
||||
.map(HttpRequestPermitPool::new)
|
||||
.map(Arc::new),
|
||||
gallery: config
|
||||
.gallery_max_concurrent_requests
|
||||
.map(HttpRequestPermitPool::new)
|
||||
.map(Arc::new),
|
||||
detail: config
|
||||
.detail_max_concurrent_requests
|
||||
.map(HttpRequestPermitPool::new)
|
||||
.map(Arc::new),
|
||||
admin: config
|
||||
.admin_max_concurrent_requests
|
||||
.map(HttpRequestPermitPool::new)
|
||||
.map(Arc::new),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pool(
|
||||
&self,
|
||||
kind: HttpRequestPermitPoolKind,
|
||||
) -> Option<(HttpRequestPermitPoolKind, Arc<HttpRequestPermitPool>)> {
|
||||
let selected = match kind {
|
||||
HttpRequestPermitPoolKind::Default => self.default.clone(),
|
||||
HttpRequestPermitPoolKind::Gallery => self.gallery.clone(),
|
||||
HttpRequestPermitPoolKind::Detail => self.detail.clone(),
|
||||
HttpRequestPermitPoolKind::Admin => self.admin.clone(),
|
||||
};
|
||||
selected.map(|pool| (kind, pool)).or_else(|| {
|
||||
self.default
|
||||
.clone()
|
||||
.map(|pool| (HttpRequestPermitPoolKind::Default, pool))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BackpressureState {
|
||||
permit_pools: HttpRequestPermitPools,
|
||||
}
|
||||
|
||||
impl BackpressureState {
|
||||
pub fn request_permit_pool(
|
||||
&self,
|
||||
kind: HttpRequestPermitPoolKind,
|
||||
) -> Option<(HttpRequestPermitPoolKind, Arc<HttpRequestPermitPool>)> {
|
||||
self.permit_pools.pool(kind)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AppState(Arc<AppStateInner>);
|
||||
|
||||
impl std::ops::Deref for AppState {
|
||||
type Target = AppStateInner;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl FromRef<AppState> for BackpressureState {
|
||||
fn from_ref(state: &AppState) -> Self {
|
||||
Self {
|
||||
permit_pools: state.http_request_permit_pools(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Axum/Hyper 会在路由树和连接 service 上频繁 clone state;AppState 外层必须保持浅拷贝。
|
||||
#[derive(Debug)]
|
||||
pub struct AppStateInner {
|
||||
// 配置会在后续中间件、路由和平台适配接入时逐步消费。
|
||||
#[allow(dead_code)]
|
||||
pub config: AppConfig,
|
||||
http_request_permit_pool: Option<Arc<HttpRequestPermitPool>>,
|
||||
http_request_permit_pools: HttpRequestPermitPools,
|
||||
auth_jwt_config: JwtConfig,
|
||||
admin_runtime: Option<AdminRuntime>,
|
||||
refresh_cookie_config: RefreshCookieConfig,
|
||||
@@ -66,6 +168,7 @@ pub struct AppState {
|
||||
ai_task_service: AiTaskService,
|
||||
spacetime_client: SpacetimeClient,
|
||||
puzzle_gallery_cache: PuzzleGalleryCache,
|
||||
tracking_outbox: Option<Arc<TrackingOutbox>>,
|
||||
llm_client: Option<LlmClient>,
|
||||
creative_agent_gpt5_client: Option<LlmClient>,
|
||||
creative_agent_executor: Arc<MockLangChainRustAgentExecutor>,
|
||||
@@ -196,16 +299,14 @@ impl AppState {
|
||||
pool_size: config.spacetime_pool_size,
|
||||
procedure_timeout: config.spacetime_procedure_timeout,
|
||||
});
|
||||
let tracking_outbox = TrackingOutbox::from_config(&config, spacetime_client.clone());
|
||||
let llm_client = build_llm_client(&config)?;
|
||||
let creative_agent_gpt5_client = build_creative_agent_gpt5_client(&config)?;
|
||||
let http_request_permit_pool = config
|
||||
.max_concurrent_requests
|
||||
.map(HttpRequestPermitPool::new)
|
||||
.map(Arc::new);
|
||||
let http_request_permit_pools = HttpRequestPermitPools::from_config(&config);
|
||||
|
||||
Ok(Self {
|
||||
Ok(Self(Arc::new(AppStateInner {
|
||||
config,
|
||||
http_request_permit_pool,
|
||||
http_request_permit_pools,
|
||||
auth_jwt_config,
|
||||
admin_runtime,
|
||||
refresh_cookie_config,
|
||||
@@ -226,13 +327,14 @@ impl AppState {
|
||||
ai_task_service,
|
||||
spacetime_client,
|
||||
puzzle_gallery_cache: PuzzleGalleryCache::new(),
|
||||
tracking_outbox,
|
||||
llm_client,
|
||||
creative_agent_gpt5_client,
|
||||
creative_agent_executor: Arc::new(MockLangChainRustAgentExecutor),
|
||||
creative_agent_sessions: Arc::new(Mutex::new(HashMap::new())),
|
||||
#[cfg(test)]
|
||||
test_runtime_snapshot_store: Arc::new(Mutex::new(HashMap::new())),
|
||||
})
|
||||
})))
|
||||
}
|
||||
|
||||
pub fn auth_jwt_config(&self) -> &JwtConfig {
|
||||
@@ -247,8 +349,8 @@ impl AppState {
|
||||
&self.refresh_cookie_config
|
||||
}
|
||||
|
||||
pub fn http_request_permit_pool(&self) -> Option<Arc<HttpRequestPermitPool>> {
|
||||
self.http_request_permit_pool.clone()
|
||||
pub fn http_request_permit_pools(&self) -> HttpRequestPermitPools {
|
||||
self.http_request_permit_pools.clone()
|
||||
}
|
||||
|
||||
pub async fn upsert_creation_entry_type_config(
|
||||
@@ -484,6 +586,10 @@ impl AppState {
|
||||
&self.puzzle_gallery_cache
|
||||
}
|
||||
|
||||
pub fn tracking_outbox(&self) -> Option<Arc<TrackingOutbox>> {
|
||||
self.tracking_outbox.clone()
|
||||
}
|
||||
|
||||
pub fn llm_client(&self) -> Option<&LlmClient> {
|
||||
self.llm_client.as_ref()
|
||||
}
|
||||
|
||||
@@ -12,10 +12,16 @@ use std::sync::{
|
||||
};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::{request_context::resolve_request_id, state::AppState};
|
||||
use crate::{
|
||||
request_context::resolve_request_id,
|
||||
state::{AppState, HttpRequestPermitPoolKind},
|
||||
};
|
||||
|
||||
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
|
||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<Arc<AtomicI64>> = OnceLock::new();
|
||||
static TRACKING_OUTBOX_PENDING_BYTES: AtomicI64 = AtomicI64::new(0);
|
||||
static TRACKING_OUTBOX_PENDING_FILES: AtomicI64 = AtomicI64::new(0);
|
||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<HttpRequestPermitsAvailableGauges> =
|
||||
OnceLock::new();
|
||||
|
||||
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
||||
pub async fn record_http_observability(
|
||||
@@ -78,34 +84,94 @@ pub async fn record_http_observability(
|
||||
track_response_body_in_flight(response)
|
||||
}
|
||||
|
||||
pub(crate) fn update_http_request_permits_available(available: usize) {
|
||||
let gauge = HTTP_REQUEST_PERMITS_AVAILABLE.get_or_init(|| {
|
||||
let gauge = Arc::new(AtomicI64::new(0));
|
||||
register_http_request_permits_available_metric(gauge.clone());
|
||||
gauge
|
||||
});
|
||||
gauge.store(available.min(i64::MAX as usize) as i64, Ordering::Relaxed);
|
||||
pub(crate) fn update_http_request_permits_available(
|
||||
pool: HttpRequestPermitPoolKind,
|
||||
available: usize,
|
||||
) {
|
||||
HTTP_REQUEST_PERMITS_AVAILABLE
|
||||
.get_or_init(register_http_request_permits_available_metric)
|
||||
.store(pool, available);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_hit() {
|
||||
puzzle_gallery_cache_metrics().hits.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_stale_hit() {
|
||||
puzzle_gallery_cache_metrics().stale_hits.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_miss() {
|
||||
puzzle_gallery_cache_metrics().misses.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(duration: std::time::Duration, data_bytes: usize) {
|
||||
pub(crate) fn record_puzzle_gallery_cache_refresh_started() {
|
||||
puzzle_gallery_cache_metrics().refreshes_started.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_refresh_failed() {
|
||||
puzzle_gallery_cache_metrics().refreshes_failed.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(
|
||||
duration: std::time::Duration,
|
||||
data_bytes: usize,
|
||||
) {
|
||||
let metrics = puzzle_gallery_cache_metrics();
|
||||
metrics.rebuilds.add(1, &[]);
|
||||
metrics
|
||||
.rebuild_duration
|
||||
.record(duration.as_secs_f64(), &[]);
|
||||
metrics.rebuild_duration.record(duration.as_secs_f64(), &[]);
|
||||
metrics
|
||||
.data_json_bytes
|
||||
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_tracking_outbox_enqueued() {
|
||||
tracking_outbox_metrics().enqueued.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_tracking_outbox_dropped(reason: &'static str) {
|
||||
tracking_outbox_metrics()
|
||||
.dropped
|
||||
.add(1, &[KeyValue::new("reason", reason)]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_tracking_outbox_sealed(reason: &'static str) {
|
||||
tracking_outbox_metrics()
|
||||
.sealed_files
|
||||
.add(1, &[KeyValue::new("reason", reason)]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_tracking_outbox_corrupt_file() {
|
||||
tracking_outbox_metrics().corrupt_files.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_tracking_outbox_flush(
|
||||
duration: std::time::Duration,
|
||||
accepted_count: u32,
|
||||
file_bytes: u64,
|
||||
failed: bool,
|
||||
) {
|
||||
let status_class = if failed { "error" } else { "ok" };
|
||||
let labels = [KeyValue::new("status_class", status_class)];
|
||||
let metrics = tracking_outbox_metrics();
|
||||
metrics.flushes.add(1, &labels);
|
||||
metrics
|
||||
.flush_duration
|
||||
.record(duration.as_secs_f64(), &labels);
|
||||
metrics
|
||||
.flushed_events
|
||||
.add(u64::from(accepted_count), &labels);
|
||||
metrics.flushed_bytes.add(file_bytes, &labels);
|
||||
}
|
||||
|
||||
pub(crate) fn update_tracking_outbox_pending_bytes(bytes: u64) {
|
||||
TRACKING_OUTBOX_PENDING_BYTES.store(bytes.min(i64::MAX as u64) as i64, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub(crate) fn update_tracking_outbox_pending_files(files: usize) {
|
||||
TRACKING_OUTBOX_PENDING_FILES.store(files.min(i64::MAX as usize) as i64, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn track_response_body_in_flight(response: Response<Body>) -> Response<Body> {
|
||||
response.map(|body| {
|
||||
HTTP_RESPONSE_BODY_IN_FLIGHT.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -125,12 +191,55 @@ struct HttpMetrics {
|
||||
|
||||
struct PuzzleGalleryCacheMetrics {
|
||||
hits: Counter<u64>,
|
||||
stale_hits: Counter<u64>,
|
||||
misses: Counter<u64>,
|
||||
refreshes_started: Counter<u64>,
|
||||
refreshes_failed: Counter<u64>,
|
||||
rebuilds: Counter<u64>,
|
||||
rebuild_duration: opentelemetry::metrics::Histogram<f64>,
|
||||
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
|
||||
}
|
||||
|
||||
struct TrackingOutboxMetrics {
|
||||
enqueued: Counter<u64>,
|
||||
dropped: Counter<u64>,
|
||||
sealed_files: Counter<u64>,
|
||||
corrupt_files: Counter<u64>,
|
||||
flushes: Counter<u64>,
|
||||
flush_duration: opentelemetry::metrics::Histogram<f64>,
|
||||
flushed_events: Counter<u64>,
|
||||
flushed_bytes: Counter<u64>,
|
||||
}
|
||||
|
||||
struct HttpRequestPermitsAvailableGauges {
|
||||
default: Arc<AtomicI64>,
|
||||
gallery: Arc<AtomicI64>,
|
||||
detail: Arc<AtomicI64>,
|
||||
admin: Arc<AtomicI64>,
|
||||
}
|
||||
|
||||
impl HttpRequestPermitsAvailableGauges {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
default: Arc::new(AtomicI64::new(0)),
|
||||
gallery: Arc::new(AtomicI64::new(0)),
|
||||
detail: Arc::new(AtomicI64::new(0)),
|
||||
admin: Arc::new(AtomicI64::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn store(&self, pool: HttpRequestPermitPoolKind, available: usize) {
|
||||
let value = available.min(i64::MAX as usize) as i64;
|
||||
match pool {
|
||||
HttpRequestPermitPoolKind::Default => &self.default,
|
||||
HttpRequestPermitPoolKind::Gallery => &self.gallery,
|
||||
HttpRequestPermitPoolKind::Detail => &self.detail,
|
||||
HttpRequestPermitPoolKind::Admin => &self.admin,
|
||||
}
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
struct ResponseBodyInFlightGuard;
|
||||
|
||||
impl Drop for ResponseBodyInFlightGuard {
|
||||
@@ -171,10 +280,22 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.hits")
|
||||
.with_description("Puzzle gallery response cache hits")
|
||||
.build(),
|
||||
stale_hits: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.stale_hits")
|
||||
.with_description("Puzzle gallery stale response cache hits")
|
||||
.build(),
|
||||
misses: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.misses")
|
||||
.with_description("Puzzle gallery response cache misses")
|
||||
.build(),
|
||||
refreshes_started: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_started")
|
||||
.with_description("Puzzle gallery background refresh start count")
|
||||
.build(),
|
||||
refreshes_failed: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_failed")
|
||||
.with_description("Puzzle gallery background refresh failure count")
|
||||
.build(),
|
||||
rebuilds: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.rebuilds")
|
||||
.with_description("Puzzle gallery response cache rebuild count")
|
||||
@@ -193,16 +314,94 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
||||
})
|
||||
}
|
||||
|
||||
fn register_http_request_permits_available_metric(gauge: Arc<AtomicI64>) {
|
||||
fn tracking_outbox_metrics() -> &'static TrackingOutboxMetrics {
|
||||
static METRICS: std::sync::OnceLock<TrackingOutboxMetrics> = std::sync::OnceLock::new();
|
||||
METRICS.get_or_init(|| {
|
||||
let meter = global::meter("genarrative-api");
|
||||
TrackingOutboxMetrics {
|
||||
enqueued: meter
|
||||
.u64_counter("genarrative.tracking_outbox.events.enqueued")
|
||||
.with_description("Tracking events appended to the local outbox")
|
||||
.build(),
|
||||
dropped: meter
|
||||
.u64_counter("genarrative.tracking_outbox.events.dropped")
|
||||
.with_description("Tracking events dropped by local outbox protection")
|
||||
.build(),
|
||||
sealed_files: meter
|
||||
.u64_counter("genarrative.tracking_outbox.files.sealed")
|
||||
.with_description("Tracking outbox active files sealed for flushing")
|
||||
.build(),
|
||||
corrupt_files: meter
|
||||
.u64_counter("genarrative.tracking_outbox.files.corrupt")
|
||||
.with_description(
|
||||
"Tracking outbox sealed files quarantined because they could not be parsed",
|
||||
)
|
||||
.build(),
|
||||
flushes: meter
|
||||
.u64_counter("genarrative.tracking_outbox.flushes")
|
||||
.with_description("Tracking outbox sealed file flush attempts")
|
||||
.build(),
|
||||
flush_duration: meter
|
||||
.f64_histogram("genarrative.tracking_outbox.flush.duration")
|
||||
.with_unit("s")
|
||||
.with_description("Tracking outbox sealed file flush duration")
|
||||
.build(),
|
||||
flushed_events: meter
|
||||
.u64_counter("genarrative.tracking_outbox.events.flushed")
|
||||
.with_description("Tracking events accepted by SpacetimeDB batch procedure")
|
||||
.build(),
|
||||
flushed_bytes: meter
|
||||
.u64_counter("genarrative.tracking_outbox.bytes.flushed")
|
||||
.with_unit("By")
|
||||
.with_description("Tracking outbox bytes removed after successful flush")
|
||||
.build(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn register_http_request_permits_available_metric() -> HttpRequestPermitsAvailableGauges {
|
||||
let gauges = HttpRequestPermitsAvailableGauges::new();
|
||||
let meter = global::meter("genarrative-api");
|
||||
let default_gauge = gauges.default.clone();
|
||||
let gallery_gauge = gauges.gallery.clone();
|
||||
let detail_gauge = gauges.detail.clone();
|
||||
let admin_gauge = gauges.admin.clone();
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.http.server.request_permits.available")
|
||||
.with_unit("{permit}")
|
||||
.with_description("Available api-server HTTP backpressure permits")
|
||||
.with_callback(move |observer| {
|
||||
observer.observe(gauge.load(Ordering::Relaxed), &[]);
|
||||
observer.observe(
|
||||
default_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Default.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
gallery_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Gallery.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
detail_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Detail.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
admin_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Admin.as_str(),
|
||||
)],
|
||||
);
|
||||
})
|
||||
.build();
|
||||
gauges
|
||||
}
|
||||
|
||||
pub(crate) fn register_http_runtime_metrics() {
|
||||
@@ -217,6 +416,22 @@ pub(crate) fn register_http_runtime_metrics() {
|
||||
observer.observe(HTTP_RESPONSE_BODY_IN_FLIGHT.load(Ordering::Relaxed), &[]);
|
||||
})
|
||||
.build();
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.tracking_outbox.pending.bytes")
|
||||
.with_unit("By")
|
||||
.with_description("Tracking outbox bytes waiting on local disk")
|
||||
.with_callback(|observer| {
|
||||
observer.observe(TRACKING_OUTBOX_PENDING_BYTES.load(Ordering::Relaxed), &[]);
|
||||
})
|
||||
.build();
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.tracking_outbox.pending.files")
|
||||
.with_unit("{file}")
|
||||
.with_description("Tracking outbox sealed files waiting for flush")
|
||||
.with_callback(|observer| {
|
||||
observer.observe(TRACKING_OUTBOX_PENDING_FILES.load(Ordering::Relaxed), &[]);
|
||||
})
|
||||
.build();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -284,19 +499,13 @@ mod tests {
|
||||
observability_route("/api/runtime/puzzle/runs/run-123/history"),
|
||||
"/api/*"
|
||||
);
|
||||
assert_eq!(
|
||||
observability_route("/admin/api/debug/http"),
|
||||
"/admin/api/*"
|
||||
);
|
||||
assert_eq!(observability_route("/admin/api/debug/http"), "/admin/api/*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_request_scheme_uses_forwarded_proto_first_value() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
"x-forwarded-proto",
|
||||
HeaderValue::from_static("https, http"),
|
||||
);
|
||||
headers.insert("x-forwarded-proto", HeaderValue::from_static("https, http"));
|
||||
|
||||
assert_eq!(resolve_request_scheme(&headers), "https");
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ pub async fn record_route_tracking_event_after_success(
|
||||
draft.owner_user_id = draft.user_id.clone();
|
||||
}
|
||||
|
||||
record_tracking_event_after_success(state, request_context, draft).await;
|
||||
record_route_tracking_event_via_outbox_after_success(state, request_context, draft).await;
|
||||
}
|
||||
|
||||
fn resolve_route_tracking_spec(method: &Method, path: &str) -> Option<RouteTrackingSpec> {
|
||||
@@ -524,26 +524,101 @@ pub async fn record_tracking_event_after_success(
|
||||
request_context: &RequestContext,
|
||||
draft: TrackingEventDraft,
|
||||
) {
|
||||
let occurred_at_micros = OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000;
|
||||
let event_id = build_tracking_event_id(&draft, occurred_at_micros);
|
||||
let event_key = draft.event_key.to_string();
|
||||
let scope_kind = draft.scope_kind;
|
||||
let scope_id = draft.scope_id;
|
||||
let metadata_json = draft.metadata.to_string();
|
||||
record_tracking_event_input_after_success(
|
||||
state,
|
||||
request_context,
|
||||
build_tracking_event_input(draft),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn record_route_tracking_event_via_outbox_after_success(
|
||||
state: &AppState,
|
||||
request_context: &RequestContext,
|
||||
draft: TrackingEventDraft,
|
||||
) {
|
||||
let event = build_tracking_event_input(draft);
|
||||
let event_key = event.event_key.clone();
|
||||
let scope_kind = event.scope_kind;
|
||||
let scope_id = event.scope_id.clone();
|
||||
|
||||
if let Some(outbox) = state.tracking_outbox() {
|
||||
match outbox.enqueue(event.clone()).await {
|
||||
Ok(crate::tracking_outbox::TrackingOutboxEnqueueOutcome::Enqueued) => {
|
||||
tracing::debug!(
|
||||
request_id = request_context.request_id(),
|
||||
operation = request_context.operation(),
|
||||
event_key = %event_key,
|
||||
scope_kind = %scope_kind.as_str(),
|
||||
scope_id = %scope_id,
|
||||
"后端 route 埋点已写入本机 outbox"
|
||||
);
|
||||
return;
|
||||
}
|
||||
Ok(crate::tracking_outbox::TrackingOutboxEnqueueOutcome::Dropped { reason }) => {
|
||||
tracing::warn!(
|
||||
request_id = request_context.request_id(),
|
||||
operation = request_context.operation(),
|
||||
event_key = %event_key,
|
||||
scope_kind = %scope_kind.as_str(),
|
||||
scope_id = %scope_id,
|
||||
reason,
|
||||
"后端 route 埋点因 outbox 保护阈值被丢弃,主业务流程继续"
|
||||
);
|
||||
return;
|
||||
}
|
||||
Err(error) => {
|
||||
tracing::warn!(
|
||||
request_id = request_context.request_id(),
|
||||
operation = request_context.operation(),
|
||||
event_key = %event_key,
|
||||
scope_kind = %scope_kind.as_str(),
|
||||
scope_id = %scope_id,
|
||||
error = %error,
|
||||
"后端 route 埋点写入 outbox 失败,回退同步直写 SpacetimeDB"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
record_tracking_event_input_after_success(state, request_context, event).await;
|
||||
}
|
||||
|
||||
async fn record_tracking_event_input_after_success(
|
||||
state: &AppState,
|
||||
request_context: &RequestContext,
|
||||
event: module_runtime::RuntimeTrackingEventInput,
|
||||
) {
|
||||
let event_key = event.event_key.clone();
|
||||
let log_scope_kind = event.scope_kind;
|
||||
let scope_id = event.scope_id.clone();
|
||||
|
||||
let module_runtime::RuntimeTrackingEventInput {
|
||||
event_id,
|
||||
event_key: procedure_event_key,
|
||||
scope_kind: procedure_scope_kind,
|
||||
scope_id: procedure_scope_id,
|
||||
user_id,
|
||||
owner_user_id,
|
||||
profile_id,
|
||||
module_key,
|
||||
metadata_json,
|
||||
occurred_at_micros,
|
||||
} = event;
|
||||
|
||||
match state
|
||||
.spacetime_client()
|
||||
.record_tracking_event(
|
||||
event_id,
|
||||
event_key.clone(),
|
||||
scope_kind,
|
||||
scope_id.clone(),
|
||||
draft.user_id,
|
||||
draft.owner_user_id,
|
||||
draft.profile_id,
|
||||
draft.module_key.map(str::to_string),
|
||||
procedure_event_key,
|
||||
procedure_scope_kind,
|
||||
procedure_scope_id,
|
||||
user_id,
|
||||
owner_user_id,
|
||||
profile_id,
|
||||
module_key,
|
||||
metadata_json,
|
||||
occurred_at_micros as i64,
|
||||
occurred_at_micros,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -551,7 +626,7 @@ pub async fn record_tracking_event_after_success(
|
||||
request_id = request_context.request_id(),
|
||||
operation = request_context.operation(),
|
||||
event_key = %event_key,
|
||||
scope_kind = %scope_kind.as_str(),
|
||||
scope_kind = %log_scope_kind.as_str(),
|
||||
scope_id = %scope_id,
|
||||
"后端埋点已记录"
|
||||
),
|
||||
@@ -559,7 +634,7 @@ pub async fn record_tracking_event_after_success(
|
||||
request_id = request_context.request_id(),
|
||||
operation = request_context.operation(),
|
||||
event_key = %event_key,
|
||||
scope_kind = %scope_kind.as_str(),
|
||||
scope_kind = %log_scope_kind.as_str(),
|
||||
scope_id = %scope_id,
|
||||
error = %error,
|
||||
"后端埋点记录失败,主业务流程继续"
|
||||
@@ -567,6 +642,26 @@ pub async fn record_tracking_event_after_success(
|
||||
}
|
||||
}
|
||||
|
||||
fn build_tracking_event_input(
|
||||
draft: TrackingEventDraft,
|
||||
) -> module_runtime::RuntimeTrackingEventInput {
|
||||
let occurred_at_micros = OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000;
|
||||
let event_id = build_tracking_event_id(&draft, occurred_at_micros);
|
||||
|
||||
module_runtime::RuntimeTrackingEventInput {
|
||||
event_id,
|
||||
event_key: draft.event_key.to_string(),
|
||||
scope_kind: draft.scope_kind,
|
||||
scope_id: draft.scope_id,
|
||||
user_id: draft.user_id,
|
||||
owner_user_id: draft.owner_user_id,
|
||||
profile_id: draft.profile_id,
|
||||
module_key: draft.module_key.map(str::to_string),
|
||||
metadata_json: draft.metadata.to_string(),
|
||||
occurred_at_micros: occurred_at_micros as i64,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_tracking_event_id(draft: &TrackingEventDraft, occurred_at_micros: i128) -> String {
|
||||
if draft.event_key == "daily_login"
|
||||
&& draft.scope_kind == RuntimeTrackingScopeKind::User
|
||||
|
||||
621
server-rs/crates/api-server/src/tracking_outbox.rs
Normal file
621
server-rs/crates/api-server/src/tracking_outbox.rs
Normal file
@@ -0,0 +1,621 @@
|
||||
use std::{
|
||||
fmt,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::{Duration, Instant, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
|
||||
use module_runtime::RuntimeTrackingEventInput;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use spacetime_client::{SpacetimeClient, SpacetimeClientError};
|
||||
use tokio::{
|
||||
fs::{self, File, OpenOptions},
|
||||
io::{AsyncBufReadExt, AsyncWriteExt, BufReader},
|
||||
sync::{Mutex, Notify},
|
||||
time::sleep,
|
||||
};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::config::AppConfig;
|
||||
|
||||
const ACTIVE_FILE_NAME: &str = "active.ndjson";
|
||||
const SEALED_FILE_PREFIX: &str = "sealed-";
|
||||
const CORRUPT_FILE_PREFIX: &str = "corrupt-";
|
||||
const SEALED_FILE_EXTENSION: &str = ".ndjson";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TrackingOutbox {
|
||||
dir: PathBuf,
|
||||
batch_size: usize,
|
||||
flush_interval: Duration,
|
||||
max_bytes: u64,
|
||||
spacetime_client: SpacetimeClient,
|
||||
inner: Arc<Mutex<TrackingOutboxInner>>,
|
||||
flush_notify: Arc<Notify>,
|
||||
}
|
||||
|
||||
struct TrackingOutboxInner {
|
||||
initialized: bool,
|
||||
active_file: Option<File>,
|
||||
active_count: usize,
|
||||
active_bytes: u64,
|
||||
total_bytes: u64,
|
||||
last_sealed_at: Instant,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TrackingOutboxEnqueueOutcome {
|
||||
Enqueued,
|
||||
Dropped { reason: &'static str },
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TrackingOutboxError {
|
||||
Io(std::io::Error),
|
||||
Json(serde_json::Error),
|
||||
Spacetime(SpacetimeClientError),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
struct TrackingOutboxRecord {
|
||||
event: RuntimeTrackingEventInput,
|
||||
}
|
||||
|
||||
impl TrackingOutbox {
|
||||
pub fn from_config(config: &AppConfig, spacetime_client: SpacetimeClient) -> Option<Arc<Self>> {
|
||||
if !config.tracking_outbox_enabled {
|
||||
return None;
|
||||
}
|
||||
|
||||
let total_bytes = directory_size_if_exists(&config.tracking_outbox_dir).unwrap_or(0);
|
||||
let outbox = Self {
|
||||
dir: config.tracking_outbox_dir.clone(),
|
||||
batch_size: config.tracking_outbox_batch_size.max(1),
|
||||
flush_interval: config.tracking_outbox_flush_interval,
|
||||
max_bytes: config.tracking_outbox_max_bytes,
|
||||
spacetime_client,
|
||||
inner: Arc::new(Mutex::new(TrackingOutboxInner {
|
||||
initialized: false,
|
||||
active_file: None,
|
||||
active_count: 0,
|
||||
active_bytes: 0,
|
||||
total_bytes,
|
||||
last_sealed_at: Instant::now(),
|
||||
})),
|
||||
flush_notify: Arc::new(Notify::new()),
|
||||
};
|
||||
crate::telemetry::update_tracking_outbox_pending_bytes(total_bytes);
|
||||
Some(Arc::new(outbox))
|
||||
}
|
||||
|
||||
pub async fn enqueue(
|
||||
&self,
|
||||
event: RuntimeTrackingEventInput,
|
||||
) -> Result<TrackingOutboxEnqueueOutcome, TrackingOutboxError> {
|
||||
let record = TrackingOutboxRecord { event };
|
||||
let mut line = serde_json::to_vec(&record)?;
|
||||
line.push(b'\n');
|
||||
let line_bytes = line.len().min(u64::MAX as usize) as u64;
|
||||
|
||||
let mut inner = self.inner.lock().await;
|
||||
self.ensure_initialized_locked(&mut inner).await?;
|
||||
|
||||
if inner.total_bytes.saturating_add(line_bytes) > self.max_bytes {
|
||||
crate::telemetry::record_tracking_outbox_dropped("max_bytes");
|
||||
return Ok(TrackingOutboxEnqueueOutcome::Dropped {
|
||||
reason: "max_bytes",
|
||||
});
|
||||
}
|
||||
|
||||
let active_path = self.active_path();
|
||||
if inner.active_file.is_none() {
|
||||
inner.active_file = Some(
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&active_path)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
|
||||
let file = inner
|
||||
.active_file
|
||||
.as_mut()
|
||||
.expect("active file should be open before append");
|
||||
file.write_all(&line).await?;
|
||||
inner.active_count = inner.active_count.saturating_add(1);
|
||||
inner.active_bytes = inner.active_bytes.saturating_add(line_bytes);
|
||||
inner.total_bytes = inner.total_bytes.saturating_add(line_bytes);
|
||||
crate::telemetry::record_tracking_outbox_enqueued();
|
||||
crate::telemetry::update_tracking_outbox_pending_bytes(inner.total_bytes);
|
||||
|
||||
if inner.active_count >= self.batch_size {
|
||||
self.seal_active_locked(&mut inner, "batch_size").await?;
|
||||
self.flush_notify.notify_one();
|
||||
}
|
||||
|
||||
Ok(TrackingOutboxEnqueueOutcome::Enqueued)
|
||||
}
|
||||
|
||||
pub fn spawn_worker(self: Arc<Self>) {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = sleep(self.flush_interval) => {
|
||||
if let Err(error) = self.seal_active_if_due().await {
|
||||
warn!(error = %error, "tracking outbox 定时封存 active 文件失败");
|
||||
}
|
||||
if let Err(error) = self.flush_sealed_files_once().await {
|
||||
warn!(error = %error, "tracking outbox 批量写入 SpacetimeDB 失败,将保留 sealed 文件等待重试");
|
||||
}
|
||||
}
|
||||
_ = self.flush_notify.notified() => {
|
||||
if let Err(error) = self.flush_sealed_files_once().await {
|
||||
warn!(error = %error, "tracking outbox 批量写入 SpacetimeDB 失败,将保留 sealed 文件等待重试");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn seal_active_if_due(&self) -> Result<(), TrackingOutboxError> {
|
||||
let mut inner = self.inner.lock().await;
|
||||
self.ensure_initialized_locked(&mut inner).await?;
|
||||
if inner.active_count == 0 || inner.last_sealed_at.elapsed() < self.flush_interval {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.seal_active_locked(&mut inner, "flush_interval").await
|
||||
}
|
||||
|
||||
async fn flush_sealed_files_once(&self) -> Result<(), TrackingOutboxError> {
|
||||
self.ensure_initialized().await?;
|
||||
|
||||
let sealed_files = self.list_sealed_files().await?;
|
||||
crate::telemetry::update_tracking_outbox_pending_files(sealed_files.len());
|
||||
for path in sealed_files {
|
||||
let started_at = Instant::now();
|
||||
let metadata = fs::metadata(&path).await?;
|
||||
let file_bytes = metadata.len();
|
||||
let events = match read_outbox_events(&path).await {
|
||||
Ok(events) => events,
|
||||
Err(error) if error.is_data_corruption() => {
|
||||
let corrupt_path = self.corrupt_path_for(&path);
|
||||
fs::rename(&path, &corrupt_path).await?;
|
||||
self.subtract_total_bytes(file_bytes).await;
|
||||
crate::telemetry::record_tracking_outbox_corrupt_file();
|
||||
warn!(
|
||||
error = %error,
|
||||
source = %path.display(),
|
||||
target = %corrupt_path.display(),
|
||||
"tracking outbox sealed 文件含无法解析的记录,已隔离并继续处理后续文件"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
if events.is_empty() {
|
||||
fs::remove_file(&path).await?;
|
||||
self.subtract_total_bytes(file_bytes).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
match self.spacetime_client.record_tracking_events(events).await {
|
||||
Ok(accepted_count) => {
|
||||
fs::remove_file(&path).await?;
|
||||
self.subtract_total_bytes(file_bytes).await;
|
||||
crate::telemetry::record_tracking_outbox_flush(
|
||||
started_at.elapsed(),
|
||||
accepted_count,
|
||||
file_bytes,
|
||||
false,
|
||||
);
|
||||
debug!(
|
||||
accepted_count,
|
||||
file_bytes,
|
||||
path = %path.display(),
|
||||
"tracking outbox sealed 文件已批量入库并删除"
|
||||
);
|
||||
}
|
||||
Err(error) => {
|
||||
crate::telemetry::record_tracking_outbox_flush(
|
||||
started_at.elapsed(),
|
||||
0,
|
||||
file_bytes,
|
||||
true,
|
||||
);
|
||||
return Err(TrackingOutboxError::Spacetime(error));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_initialized(&self) -> Result<(), TrackingOutboxError> {
|
||||
let mut inner = self.inner.lock().await;
|
||||
self.ensure_initialized_locked(&mut inner).await
|
||||
}
|
||||
|
||||
async fn ensure_initialized_locked(
|
||||
&self,
|
||||
inner: &mut TrackingOutboxInner,
|
||||
) -> Result<(), TrackingOutboxError> {
|
||||
if inner.initialized {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
fs::create_dir_all(&self.dir).await?;
|
||||
self.seal_existing_active_file().await?;
|
||||
inner.total_bytes = directory_size(&self.dir).await?;
|
||||
inner.initialized = true;
|
||||
inner.last_sealed_at = Instant::now();
|
||||
crate::telemetry::update_tracking_outbox_pending_bytes(inner.total_bytes);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn seal_active_locked(
|
||||
&self,
|
||||
inner: &mut TrackingOutboxInner,
|
||||
reason: &'static str,
|
||||
) -> Result<(), TrackingOutboxError> {
|
||||
if inner.active_count == 0 && inner.active_bytes == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if let Some(mut file) = inner.active_file.take() {
|
||||
file.flush().await?;
|
||||
file.sync_data().await?;
|
||||
drop(file);
|
||||
}
|
||||
|
||||
let active_path = self.active_path();
|
||||
match fs::metadata(&active_path).await {
|
||||
Ok(metadata) if metadata.len() > 0 => {
|
||||
let sealed_path = self.next_sealed_path();
|
||||
fs::rename(&active_path, &sealed_path).await?;
|
||||
crate::telemetry::record_tracking_outbox_sealed(reason);
|
||||
debug!(
|
||||
reason,
|
||||
event_count = inner.active_count,
|
||||
file_bytes = metadata.len(),
|
||||
path = %sealed_path.display(),
|
||||
"tracking outbox active 文件已封存"
|
||||
);
|
||||
}
|
||||
Ok(_) => {
|
||||
let _ = fs::remove_file(&active_path).await;
|
||||
}
|
||||
Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(error) => return Err(error.into()),
|
||||
}
|
||||
|
||||
inner.active_count = 0;
|
||||
inner.active_bytes = 0;
|
||||
inner.last_sealed_at = Instant::now();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn seal_existing_active_file(&self) -> Result<(), TrackingOutboxError> {
|
||||
let active_path = self.active_path();
|
||||
match fs::metadata(&active_path).await {
|
||||
Ok(metadata) if metadata.len() > 0 => {
|
||||
fs::rename(&active_path, self.next_sealed_path()).await?;
|
||||
crate::telemetry::record_tracking_outbox_sealed("startup");
|
||||
}
|
||||
Ok(_) => {
|
||||
let _ = fs::remove_file(&active_path).await;
|
||||
}
|
||||
Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(error) => return Err(error.into()),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_sealed_files(&self) -> Result<Vec<PathBuf>, TrackingOutboxError> {
|
||||
let mut entries = fs::read_dir(&self.dir).await?;
|
||||
let mut files = Vec::new();
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
let Some(name) = path.file_name().and_then(|value| value.to_str()) else {
|
||||
continue;
|
||||
};
|
||||
if name.starts_with(SEALED_FILE_PREFIX) && name.ends_with(SEALED_FILE_EXTENSION) {
|
||||
files.push(path);
|
||||
}
|
||||
}
|
||||
files.sort();
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
async fn subtract_total_bytes(&self, bytes: u64) {
|
||||
let mut inner = self.inner.lock().await;
|
||||
inner.total_bytes = inner.total_bytes.saturating_sub(bytes);
|
||||
crate::telemetry::update_tracking_outbox_pending_bytes(inner.total_bytes);
|
||||
}
|
||||
|
||||
fn active_path(&self) -> PathBuf {
|
||||
self.dir.join(ACTIVE_FILE_NAME)
|
||||
}
|
||||
|
||||
fn next_sealed_path(&self) -> PathBuf {
|
||||
self.dir.join(format!(
|
||||
"{SEALED_FILE_PREFIX}{}-{uuid}{SEALED_FILE_EXTENSION}",
|
||||
current_unix_micros(),
|
||||
uuid = uuid::Uuid::new_v4()
|
||||
))
|
||||
}
|
||||
|
||||
fn corrupt_path_for(&self, path: &Path) -> PathBuf {
|
||||
let name = path
|
||||
.file_name()
|
||||
.and_then(|value| value.to_str())
|
||||
.unwrap_or("unknown.ndjson");
|
||||
self.dir.join(format!(
|
||||
"{CORRUPT_FILE_PREFIX}{}-{uuid}-{name}",
|
||||
current_unix_micros(),
|
||||
uuid = uuid::Uuid::new_v4()
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for TrackingOutbox {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("TrackingOutbox")
|
||||
.field("dir", &self.dir)
|
||||
.field("batch_size", &self.batch_size)
|
||||
.field("flush_interval", &self.flush_interval)
|
||||
.field("max_bytes", &self.max_bytes)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TrackingOutboxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(error) => write!(f, "{error}"),
|
||||
Self::Json(error) => write!(f, "{error}"),
|
||||
Self::Spacetime(error) => write!(f, "{error}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for TrackingOutboxError {
|
||||
fn from(value: std::io::Error) -> Self {
|
||||
Self::Io(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for TrackingOutboxError {
|
||||
fn from(value: serde_json::Error) -> Self {
|
||||
Self::Json(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl TrackingOutboxError {
|
||||
fn is_data_corruption(&self) -> bool {
|
||||
matches!(self, Self::Json(_))
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_outbox_events(
|
||||
path: &Path,
|
||||
) -> Result<Vec<RuntimeTrackingEventInput>, TrackingOutboxError> {
|
||||
let file = File::open(path).await?;
|
||||
let mut lines = BufReader::new(file).lines();
|
||||
let mut events = Vec::new();
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let record = serde_json::from_str::<TrackingOutboxRecord>(&line)?;
|
||||
events.push(record.event);
|
||||
}
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
async fn directory_size(path: &Path) -> Result<u64, TrackingOutboxError> {
|
||||
let mut total = 0u64;
|
||||
let mut entries = fs::read_dir(path).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
if !is_pending_outbox_file_name(&entry.file_name()) {
|
||||
continue;
|
||||
}
|
||||
let metadata = entry.metadata().await?;
|
||||
if metadata.is_file() {
|
||||
total = total.saturating_add(metadata.len());
|
||||
}
|
||||
}
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
fn directory_size_if_exists(path: &Path) -> Result<u64, std::io::Error> {
|
||||
if !path.is_dir() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut total = 0u64;
|
||||
for entry in std::fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
if !is_pending_outbox_file_name(&entry.file_name()) {
|
||||
continue;
|
||||
}
|
||||
let metadata = entry.metadata()?;
|
||||
if metadata.is_file() {
|
||||
total = total.saturating_add(metadata.len());
|
||||
}
|
||||
}
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
fn current_unix_micros() -> u128 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_micros()
|
||||
}
|
||||
|
||||
fn is_pending_outbox_file_name(name: &std::ffi::OsStr) -> bool {
|
||||
name.to_str().is_some_and(|value| {
|
||||
value == ACTIVE_FILE_NAME
|
||||
|| (value.starts_with(SEALED_FILE_PREFIX) && value.ends_with(SEALED_FILE_EXTENSION))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn sample_event(event_id: &str) -> RuntimeTrackingEventInput {
|
||||
RuntimeTrackingEventInput {
|
||||
event_id: event_id.to_string(),
|
||||
event_key: "puzzle_route_success".to_string(),
|
||||
scope_kind: module_runtime::RuntimeTrackingScopeKind::Site,
|
||||
scope_id: "site".to_string(),
|
||||
user_id: None,
|
||||
owner_user_id: None,
|
||||
profile_id: None,
|
||||
module_key: Some("puzzle".to_string()),
|
||||
metadata_json: "{}".to_string(),
|
||||
occurred_at_micros: 1_713_680_000_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
fn test_dir(name: &str) -> PathBuf {
|
||||
let dir = std::env::temp_dir().join(format!(
|
||||
"genarrative-tracking-outbox-{name}-{}",
|
||||
current_unix_micros()
|
||||
));
|
||||
let _ = std::fs::remove_dir_all(&dir);
|
||||
dir
|
||||
}
|
||||
|
||||
fn test_outbox(dir: PathBuf, batch_size: usize, max_bytes: u64) -> Arc<TrackingOutbox> {
|
||||
let config = AppConfig {
|
||||
tracking_outbox_dir: dir,
|
||||
tracking_outbox_batch_size: batch_size,
|
||||
tracking_outbox_max_bytes: max_bytes,
|
||||
tracking_outbox_flush_interval: Duration::from_secs(60),
|
||||
..AppConfig::default()
|
||||
};
|
||||
TrackingOutbox::from_config(
|
||||
&config,
|
||||
SpacetimeClient::new(spacetime_client::SpacetimeClientConfig {
|
||||
server_url: "http://127.0.0.1:1".to_string(),
|
||||
database: "missing".to_string(),
|
||||
token: None,
|
||||
pool_size: 1,
|
||||
procedure_timeout: Duration::from_millis(10),
|
||||
}),
|
||||
)
|
||||
.expect("outbox should be enabled")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn enqueue_seals_active_file_when_batch_size_reached_and_rotates_active() {
|
||||
let dir = test_dir("batch");
|
||||
let outbox = test_outbox(dir.clone(), 2, 1024 * 1024);
|
||||
|
||||
outbox.enqueue(sample_event("event-1")).await.unwrap();
|
||||
outbox.enqueue(sample_event("event-2")).await.unwrap();
|
||||
|
||||
assert!(!dir.join(ACTIVE_FILE_NAME).exists());
|
||||
let sealed_count = std::fs::read_dir(&dir)
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|entry| {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.is_some_and(|name| name.starts_with(SEALED_FILE_PREFIX))
|
||||
})
|
||||
.count();
|
||||
assert_eq!(sealed_count, 1);
|
||||
|
||||
outbox.enqueue(sample_event("event-3")).await.unwrap();
|
||||
|
||||
let active_contents = std::fs::read_to_string(dir.join(ACTIVE_FILE_NAME)).unwrap();
|
||||
assert!(active_contents.contains("event-3"));
|
||||
let sealed_count_after_rotate = std::fs::read_dir(&dir)
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|entry| {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.is_some_and(|name| name.starts_with(SEALED_FILE_PREFIX))
|
||||
})
|
||||
.count();
|
||||
assert_eq!(sealed_count_after_rotate, 1);
|
||||
|
||||
let _ = std::fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn enqueue_drops_when_outbox_exceeds_max_bytes() {
|
||||
let dir = test_dir("max-bytes");
|
||||
let outbox = test_outbox(dir.clone(), 500, 1);
|
||||
|
||||
let outcome = outbox.enqueue(sample_event("event-1")).await.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
outcome,
|
||||
TrackingOutboxEnqueueOutcome::Dropped {
|
||||
reason: "max_bytes"
|
||||
}
|
||||
));
|
||||
assert!(!dir.join(ACTIVE_FILE_NAME).exists());
|
||||
|
||||
let _ = std::fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flush_quarantines_corrupt_sealed_file() {
|
||||
let dir = test_dir("corrupt");
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
let sealed_path = dir.join(format!("{SEALED_FILE_PREFIX}bad{SEALED_FILE_EXTENSION}"));
|
||||
std::fs::write(&sealed_path, b"{not-json}\n").unwrap();
|
||||
let outbox = test_outbox(dir.clone(), 500, 1024 * 1024);
|
||||
|
||||
outbox.flush_sealed_files_once().await.unwrap();
|
||||
|
||||
assert!(!sealed_path.exists());
|
||||
let corrupt_count = std::fs::read_dir(&dir)
|
||||
.unwrap()
|
||||
.filter_map(Result::ok)
|
||||
.filter(|entry| {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.is_some_and(|name| name.starts_with(CORRUPT_FILE_PREFIX))
|
||||
})
|
||||
.count();
|
||||
assert_eq!(corrupt_count, 1);
|
||||
|
||||
let _ = std::fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn directory_size_excludes_quarantined_corrupt_files() {
|
||||
let dir = test_dir("directory-size");
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
std::fs::write(dir.join(ACTIVE_FILE_NAME), b"active").unwrap();
|
||||
std::fs::write(
|
||||
dir.join(format!("{SEALED_FILE_PREFIX}one{SEALED_FILE_EXTENSION}")),
|
||||
b"sealed",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(
|
||||
dir.join(format!("{CORRUPT_FILE_PREFIX}one{SEALED_FILE_EXTENSION}")),
|
||||
b"corrupt",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let total = directory_size_if_exists(&dir).unwrap();
|
||||
|
||||
assert_eq!(total, 12);
|
||||
|
||||
let _ = std::fs::remove_dir_all(dir);
|
||||
}
|
||||
}
|
||||
@@ -2061,6 +2061,7 @@ fn map_sms_provider_error_to_phone_error(error: SmsProviderError) -> PhoneAuthEr
|
||||
SmsProviderError::InvalidConfig(message) => {
|
||||
PhoneAuthError::SmsProviderInvalidConfig(message)
|
||||
}
|
||||
SmsProviderError::InvalidVerifyCode => PhoneAuthError::InvalidVerifyCode,
|
||||
SmsProviderError::Upstream(message) => PhoneAuthError::SmsProviderUpstream(message),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,6 +706,14 @@ pub struct RuntimeTrackingEventProcedureResult {
|
||||
pub error_message: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "spacetime-types", derive(SpacetimeType))]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RuntimeTrackingEventBatchProcedureResult {
|
||||
pub ok: bool,
|
||||
pub accepted_count: u32,
|
||||
pub error_message: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "spacetime-types", derive(SpacetimeType))]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RuntimeProfileTaskConfigSnapshot {
|
||||
|
||||
@@ -624,6 +624,7 @@ pub mod record_custom_world_profile_play_procedure;
|
||||
pub mod record_daily_login_tracking_event_and_return_procedure;
|
||||
pub mod record_puzzle_work_like_procedure;
|
||||
pub mod record_tracking_event_and_return_procedure;
|
||||
pub mod record_tracking_events_and_return_procedure;
|
||||
pub mod record_visual_novel_runtime_event_procedure;
|
||||
pub mod redeem_profile_referral_invite_code_procedure;
|
||||
pub mod redeem_profile_reward_code_procedure;
|
||||
@@ -764,6 +765,7 @@ pub mod runtime_snapshot_row_type;
|
||||
pub mod runtime_snapshot_table;
|
||||
pub mod runtime_snapshot_type;
|
||||
pub mod runtime_snapshot_upsert_input_type;
|
||||
pub mod runtime_tracking_event_batch_procedure_result_type;
|
||||
pub mod runtime_tracking_event_input_type;
|
||||
pub mod runtime_tracking_event_procedure_result_type;
|
||||
pub mod runtime_tracking_scope_kind_type;
|
||||
@@ -1548,6 +1550,7 @@ pub use record_custom_world_profile_play_procedure::record_custom_world_profile_
|
||||
pub use record_daily_login_tracking_event_and_return_procedure::record_daily_login_tracking_event_and_return;
|
||||
pub use record_puzzle_work_like_procedure::record_puzzle_work_like;
|
||||
pub use record_tracking_event_and_return_procedure::record_tracking_event_and_return;
|
||||
pub use record_tracking_events_and_return_procedure::record_tracking_events_and_return;
|
||||
pub use record_visual_novel_runtime_event_procedure::record_visual_novel_runtime_event;
|
||||
pub use redeem_profile_referral_invite_code_procedure::redeem_profile_referral_invite_code;
|
||||
pub use redeem_profile_reward_code_procedure::redeem_profile_reward_code;
|
||||
@@ -1688,6 +1691,7 @@ pub use runtime_snapshot_row_type::RuntimeSnapshotRow;
|
||||
pub use runtime_snapshot_table::*;
|
||||
pub use runtime_snapshot_type::RuntimeSnapshot;
|
||||
pub use runtime_snapshot_upsert_input_type::RuntimeSnapshotUpsertInput;
|
||||
pub use runtime_tracking_event_batch_procedure_result_type::RuntimeTrackingEventBatchProcedureResult;
|
||||
pub use runtime_tracking_event_input_type::RuntimeTrackingEventInput;
|
||||
pub use runtime_tracking_event_procedure_result_type::RuntimeTrackingEventProcedureResult;
|
||||
pub use runtime_tracking_scope_kind_type::RuntimeTrackingScopeKind;
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
|
||||
// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
|
||||
|
||||
#![allow(unused, clippy::all)]
|
||||
use spacetimedb_sdk::__codegen::{self as __sdk, __lib, __sats, __ws};
|
||||
|
||||
use super::runtime_tracking_event_batch_procedure_result_type::RuntimeTrackingEventBatchProcedureResult;
|
||||
use super::runtime_tracking_event_input_type::RuntimeTrackingEventInput;
|
||||
|
||||
#[derive(__lib::ser::Serialize, __lib::de::Deserialize, Clone, PartialEq, Debug)]
|
||||
#[sats(crate = __lib)]
|
||||
struct RecordTrackingEventsAndReturnArgs {
|
||||
pub inputs: Vec<RuntimeTrackingEventInput>,
|
||||
}
|
||||
|
||||
impl __sdk::InModule for RecordTrackingEventsAndReturnArgs {
|
||||
type Module = super::RemoteModule;
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
/// Extension trait for access to the procedure `record_tracking_events_and_return`.
|
||||
///
|
||||
/// Implemented for [`super::RemoteProcedures`].
|
||||
pub trait record_tracking_events_and_return {
|
||||
fn record_tracking_events_and_return(&self, inputs: Vec<RuntimeTrackingEventInput>) {
|
||||
self.record_tracking_events_and_return_then(inputs, |_, _| {});
|
||||
}
|
||||
|
||||
fn record_tracking_events_and_return_then(
|
||||
&self,
|
||||
inputs: Vec<RuntimeTrackingEventInput>,
|
||||
|
||||
__callback: impl FnOnce(
|
||||
&super::ProcedureEventContext,
|
||||
Result<RuntimeTrackingEventBatchProcedureResult, __sdk::InternalError>,
|
||||
) + Send
|
||||
+ 'static,
|
||||
);
|
||||
}
|
||||
|
||||
impl record_tracking_events_and_return for super::RemoteProcedures {
|
||||
fn record_tracking_events_and_return_then(
|
||||
&self,
|
||||
inputs: Vec<RuntimeTrackingEventInput>,
|
||||
|
||||
__callback: impl FnOnce(
|
||||
&super::ProcedureEventContext,
|
||||
Result<RuntimeTrackingEventBatchProcedureResult, __sdk::InternalError>,
|
||||
) + Send
|
||||
+ 'static,
|
||||
) {
|
||||
self.imp
|
||||
.invoke_procedure_with_callback::<_, RuntimeTrackingEventBatchProcedureResult>(
|
||||
"record_tracking_events_and_return",
|
||||
RecordTrackingEventsAndReturnArgs { inputs },
|
||||
__callback,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
|
||||
// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
|
||||
|
||||
#![allow(unused, clippy::all)]
|
||||
use spacetimedb_sdk::__codegen::{self as __sdk, __lib, __sats, __ws};
|
||||
|
||||
#[derive(__lib::ser::Serialize, __lib::de::Deserialize, Clone, PartialEq, Debug)]
|
||||
#[sats(crate = __lib)]
|
||||
pub struct RuntimeTrackingEventBatchProcedureResult {
|
||||
pub ok: bool,
|
||||
pub accepted_count: u32,
|
||||
pub error_message: Option<String>,
|
||||
}
|
||||
|
||||
impl __sdk::InModule for RuntimeTrackingEventBatchProcedureResult {
|
||||
type Module = super::RemoteModule;
|
||||
}
|
||||
@@ -585,6 +585,35 @@ impl SpacetimeClient {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn record_tracking_events(
|
||||
&self,
|
||||
events: Vec<module_runtime::RuntimeTrackingEventInput>,
|
||||
) -> Result<u32, SpacetimeClientError> {
|
||||
if events.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let procedure_inputs = events
|
||||
.into_iter()
|
||||
.map(crate::module_bindings::RuntimeTrackingEventInput::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.call_after_connect(
|
||||
"record_tracking_events_and_return",
|
||||
move |connection, sender| {
|
||||
connection
|
||||
.procedures()
|
||||
.record_tracking_events_and_return_then(procedure_inputs, move |_, result| {
|
||||
let mapped = result
|
||||
.map_err(SpacetimeClientError::from_sdk_error)
|
||||
.and_then(map_runtime_tracking_event_batch_procedure_result);
|
||||
send_once(&sender, mapped);
|
||||
});
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn get_profile_task_center(
|
||||
&self,
|
||||
user_id: String,
|
||||
|
||||
@@ -558,6 +558,33 @@ pub fn record_tracking_event_and_return(
|
||||
}
|
||||
}
|
||||
|
||||
// 高频 route tracking 由 api-server 本机 outbox 批量写入,减少公开列表热路径上的 procedure 调用次数。
|
||||
#[spacetimedb::procedure]
|
||||
pub fn record_tracking_events_and_return(
|
||||
ctx: &mut ProcedureContext,
|
||||
inputs: Vec<RuntimeTrackingEventInput>,
|
||||
) -> RuntimeTrackingEventBatchProcedureResult {
|
||||
match ctx.try_with_tx(|tx| {
|
||||
let mut accepted_count = 0u32;
|
||||
for input in &inputs {
|
||||
record_tracking_event(tx, input.clone())?;
|
||||
accepted_count = accepted_count.saturating_add(1);
|
||||
}
|
||||
Ok(accepted_count)
|
||||
}) {
|
||||
Ok(accepted_count) => RuntimeTrackingEventBatchProcedureResult {
|
||||
ok: true,
|
||||
accepted_count,
|
||||
error_message: None,
|
||||
},
|
||||
Err(message) => RuntimeTrackingEventBatchProcedureResult {
|
||||
ok: false,
|
||||
accepted_count: 0,
|
||||
error_message: Some(message),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// 登录成功埋点由认证链路主动调用;任务中心只负责读取和刷新任务进度。
|
||||
#[spacetimedb::procedure]
|
||||
pub fn record_daily_login_tracking_event_and_return(
|
||||
@@ -1539,6 +1566,19 @@ mod tests {
|
||||
assert!(!should_skip_existing_tracking_event_id(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracking_batch_result_reports_accepted_count() {
|
||||
let result = RuntimeTrackingEventBatchProcedureResult {
|
||||
ok: true,
|
||||
accepted_count: 2,
|
||||
error_message: None,
|
||||
};
|
||||
|
||||
assert!(result.ok);
|
||||
assert_eq!(result.accepted_count, 2);
|
||||
assert!(result.error_message.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recent_public_work_play_counts_group_requested_profiles_in_window() {
|
||||
let now_micros = PUBLIC_WORK_PLAY_DAY_MICROS * 10;
|
||||
|
||||
Reference in New Issue
Block a user