chore: add loadtest observability setup
This commit is contained in:
182
server-rs/crates/api-server/src/telemetry.rs
Normal file
182
server-rs/crates/api-server/src/telemetry.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
use axum::{
|
||||
body::Body,
|
||||
extract::State,
|
||||
http::{HeaderMap, Request, Response},
|
||||
middleware::Next,
|
||||
};
|
||||
use opentelemetry::{KeyValue, global, metrics::Counter};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::{request_context::resolve_request_id, state::AppState};
|
||||
|
||||
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
||||
pub async fn record_http_observability(
|
||||
State(state): State<AppState>,
|
||||
request: Request<Body>,
|
||||
next: Next,
|
||||
) -> Response<Body> {
|
||||
let method = request.method().as_str().to_string();
|
||||
let route = observability_route(request.uri().path());
|
||||
let scheme = resolve_request_scheme(request.headers());
|
||||
let path = request.uri().path().to_string();
|
||||
let request_id = resolve_request_id(&request).unwrap_or_else(|| "unknown".to_string());
|
||||
let base_labels = http_base_labels(method.clone(), route.clone());
|
||||
let metrics = http_metrics();
|
||||
metrics.in_flight.add(1, &base_labels);
|
||||
let started_at = std::time::Instant::now();
|
||||
|
||||
let response = next.run(request).await;
|
||||
let status = response.status().as_u16();
|
||||
let status_class = status_class(status);
|
||||
let latency_ms = started_at.elapsed().as_millis().min(u64::MAX as u128) as u64;
|
||||
let slow_request = latency_ms >= state.config.slow_request_threshold_ms;
|
||||
let labels = http_response_labels(base_labels, status);
|
||||
metrics.requests.add(1, &labels);
|
||||
metrics
|
||||
.duration
|
||||
.record(started_at.elapsed().as_secs_f64(), &labels);
|
||||
metrics.in_flight.add(-1, &labels[..2]);
|
||||
|
||||
if slow_request {
|
||||
warn!(
|
||||
request_id = %request_id,
|
||||
http.request.method = %method,
|
||||
http.route = %route,
|
||||
url.scheme = %scheme,
|
||||
url.path = %path,
|
||||
http.response.status_code = status,
|
||||
status,
|
||||
status_class,
|
||||
latency_ms,
|
||||
slow_request = true,
|
||||
"http request completed slowly"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
request_id = %request_id,
|
||||
http.request.method = %method,
|
||||
http.route = %route,
|
||||
url.scheme = %scheme,
|
||||
url.path = %path,
|
||||
http.response.status_code = status,
|
||||
status,
|
||||
status_class,
|
||||
latency_ms,
|
||||
slow_request = false,
|
||||
"http request completed"
|
||||
);
|
||||
}
|
||||
|
||||
response
|
||||
}
|
||||
|
||||
struct HttpMetrics {
|
||||
requests: Counter<u64>,
|
||||
in_flight: opentelemetry::metrics::UpDownCounter<i64>,
|
||||
duration: opentelemetry::metrics::Histogram<f64>,
|
||||
}
|
||||
|
||||
fn http_metrics() -> &'static HttpMetrics {
|
||||
static METRICS: std::sync::OnceLock<HttpMetrics> = std::sync::OnceLock::new();
|
||||
METRICS.get_or_init(|| {
|
||||
let meter = global::meter("genarrative-api");
|
||||
HttpMetrics {
|
||||
requests: meter
|
||||
.u64_counter("genarrative.http.server.requests")
|
||||
.with_description("HTTP request count grouped by route and status class")
|
||||
.build(),
|
||||
in_flight: meter
|
||||
.i64_up_down_counter("http.server.active_requests")
|
||||
.with_unit("{request}")
|
||||
.with_description("Number of active HTTP server requests")
|
||||
.build(),
|
||||
duration: meter
|
||||
.f64_histogram("http.server.request.duration")
|
||||
.with_unit("s")
|
||||
.with_description("Duration of HTTP server requests")
|
||||
.build(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn http_base_labels(method: String, route: String) -> Vec<KeyValue> {
|
||||
vec![
|
||||
KeyValue::new("http.request.method", method),
|
||||
KeyValue::new("http.route", route),
|
||||
]
|
||||
}
|
||||
|
||||
fn http_response_labels(mut labels: Vec<KeyValue>, status: u16) -> Vec<KeyValue> {
|
||||
labels.push(KeyValue::new("status_class", status_class(status)));
|
||||
labels
|
||||
}
|
||||
|
||||
fn status_class(status: u16) -> &'static str {
|
||||
match status {
|
||||
100..=199 => "1xx",
|
||||
200..=299 => "2xx",
|
||||
300..=399 => "3xx",
|
||||
400..=499 => "4xx",
|
||||
500..=599 => "5xx",
|
||||
_ => "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn observability_route(path: &str) -> String {
|
||||
if path.starts_with("/api/runtime/puzzle/gallery") {
|
||||
"/api/runtime/puzzle/gallery".to_string()
|
||||
} else if path.starts_with("/api/runtime/custom-world-gallery") {
|
||||
"/api/runtime/custom-world-gallery".to_string()
|
||||
} else if path.starts_with("/admin/api/") {
|
||||
"/admin/api/*".to_string()
|
||||
} else if path.starts_with("/api/") {
|
||||
"/api/*".to_string()
|
||||
} else {
|
||||
"other".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn resolve_request_scheme(headers: &HeaderMap) -> String {
|
||||
headers
|
||||
.get("x-forwarded-proto")
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.and_then(|value| value.split(',').next())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or("http")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use axum::http::{HeaderMap, HeaderValue};
|
||||
|
||||
use super::{observability_route, resolve_request_scheme};
|
||||
|
||||
#[test]
|
||||
fn observability_route_keeps_metrics_labels_low_cardinality() {
|
||||
assert_eq!(
|
||||
observability_route("/api/runtime/puzzle/gallery?cursor=abc"),
|
||||
"/api/runtime/puzzle/gallery"
|
||||
);
|
||||
assert_eq!(
|
||||
observability_route("/api/runtime/puzzle/runs/run-123/history"),
|
||||
"/api/*"
|
||||
);
|
||||
assert_eq!(
|
||||
observability_route("/admin/api/debug/http"),
|
||||
"/admin/api/*"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_request_scheme_uses_forwarded_proto_first_value() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
"x-forwarded-proto",
|
||||
HeaderValue::from_static("https, http"),
|
||||
);
|
||||
|
||||
assert_eq!(resolve_request_scheme(&headers), "https");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user