Fail closed when SpacetimeDB auth restore is unavailable

This commit is contained in:
kdletters
2026-05-27 20:58:37 +08:00
parent 948d5a698c
commit 418fcb0548
24 changed files with 595 additions and 601 deletions

View File

@@ -2,11 +2,12 @@ use axum::{
Router,
body::Body,
extract::{Extension, FromRef},
http::Request,
http::{Request, StatusCode},
middleware,
response::Response,
routing::{get, post},
};
use serde_json::json;
use tower_http::{
classify::ServerErrorsFailureClass,
trace::{DefaultOnRequest, TraceLayer},
@@ -18,6 +19,7 @@ use crate::{
backpressure::limit_concurrent_requests,
creation_entry_config::require_creation_entry_route_enabled,
error_middleware::normalize_error_response,
http_error::AppError,
modules,
request_context::{RequestContext, attach_request_context, resolve_request_id},
response_headers::propagate_request_id_header,
@@ -164,6 +166,96 @@ pub fn build_router(state: AppState) -> Router {
.with_state(state)
}
pub fn build_spacetime_unavailable_router(message: String) -> Router {
Router::new()
.fallback(spacetime_unavailable_handler)
.layer(Extension(SpacetimeUnavailableState {
message: message.into(),
}))
// 依赖不可用模式不挂业务 state统一返回 503并继续保留 request_id / API 版本 / 耗时响应头。
.layer(middleware::from_fn(normalize_error_response))
.layer(middleware::from_fn(propagate_request_id_header))
.layer(
TraceLayer::new_for_http()
.make_span_with(|request: &Request<Body>| {
let request_id =
resolve_request_id(request).unwrap_or_else(|| "unknown".to_string());
let route = crate::telemetry::observability_route(request.uri().path());
let scheme = crate::telemetry::resolve_request_scheme(request.headers());
let span_name = format!("{} {}", request.method(), route);
info_span!(
"http.request",
otel.kind = "server",
otel.name = %span_name,
otel.status_code = tracing::field::Empty,
http.response.status_code = tracing::field::Empty,
method = %request.method(),
http.request.method = %request.method(),
http.route = %route,
url.scheme = %scheme,
url.path = %request.uri().path(),
request_id = %request_id,
status = tracing::field::Empty,
latency_ms = tracing::field::Empty,
)
})
.on_request(DefaultOnRequest::new().level(Level::INFO))
.on_response(
|response: &axum::response::Response,
latency: std::time::Duration,
span: &Span| {
let latency_ms = latency.as_millis().min(u64::MAX as u128) as u64;
let status = response.status().as_u16();
span.record("status", status);
span.record("http.response.status_code", status);
span.record(
"otel.status_code",
if response.status().is_server_error() {
"ERROR"
} else {
"OK"
},
);
span.record("latency_ms", latency_ms);
},
)
.on_failure(
|failure: ServerErrorsFailureClass,
latency: std::time::Duration,
span: &Span| {
let latency_ms = latency.as_millis().min(u64::MAX as u128) as u64;
error!(
parent: span,
latency_ms,
failure = %failure,
"http request failed"
);
},
),
)
.layer(middleware::from_fn(attach_request_context))
}
#[derive(Clone, Debug)]
struct SpacetimeUnavailableState {
message: std::sync::Arc<str>,
}
async fn spacetime_unavailable_handler(
Extension(state): Extension<SpacetimeUnavailableState>,
Extension(request_context): Extension<RequestContext>,
) -> Response {
AppError::from_status(StatusCode::SERVICE_UNAVAILABLE)
.with_message("SpacetimeDB 暂不可用api-server 正在等待数据库恢复")
.with_details(json!({
"provider": "spacetimedb",
"reason": "spacetime_startup_unavailable",
"message": state.message.as_ref(),
}))
.into_response_with_context(Some(&request_context))
}
async fn record_api_tracking_after_success(
axum::extract::State(state): axum::extract::State<AppState>,
Extension(request_context): Extension<RequestContext>,
@@ -368,7 +460,7 @@ mod tests {
use crate::{config::AppConfig, state::AppState};
use super::build_router;
use super::{build_router, build_spacetime_unavailable_router};
const TEST_PASSWORD: &str = "secret123";
const INTERNAL_TEST_SECRET: &str = "test-internal-secret";
@@ -564,6 +656,38 @@ mod tests {
);
}
#[tokio::test]
async fn spacetime_unavailable_router_returns_service_unavailable_for_requests() {
let app = build_spacetime_unavailable_router("SpacetimeDB 启动恢复认证快照超时".to_string());
let response = app
.oneshot(
Request::builder()
.uri("/api/auth/login-options")
.header("x-request-id", "req-spacetime-unavailable")
.body(Body::empty())
.expect("request should build"),
)
.await
.expect("request should succeed");
assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
assert_eq!(
response
.headers()
.get("x-request-id")
.and_then(|value| value.to_str().ok()),
Some("req-spacetime-unavailable")
);
let body = read_json_response(response).await;
assert_eq!(body["error"]["code"], "SERVICE_UNAVAILABLE");
assert_eq!(
body["error"]["details"]["reason"],
"spacetime_startup_unavailable"
);
assert_eq!(body["error"]["details"]["provider"], "spacetimedb");
}
#[tokio::test]
async fn creation_entry_route_disabled_returns_service_unavailable() {
let state = AppState::new(AppConfig::default()).expect("state should build");