feat(api-server): add container loadtest observability
This commit is contained in:
@@ -4,11 +4,19 @@ use axum::{
|
||||
http::{HeaderMap, Request, Response},
|
||||
middleware::Next,
|
||||
};
|
||||
use http_body_util::BodyExt;
|
||||
use opentelemetry::{KeyValue, global, metrics::Counter};
|
||||
use std::sync::{
|
||||
Arc, OnceLock,
|
||||
atomic::{AtomicI64, Ordering},
|
||||
};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::{request_context::resolve_request_id, state::AppState};
|
||||
|
||||
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
|
||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<Arc<AtomicI64>> = OnceLock::new();
|
||||
|
||||
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
||||
pub async fn record_http_observability(
|
||||
State(state): State<AppState>,
|
||||
@@ -67,7 +75,46 @@ pub async fn record_http_observability(
|
||||
);
|
||||
}
|
||||
|
||||
response
|
||||
track_response_body_in_flight(response)
|
||||
}
|
||||
|
||||
pub(crate) fn update_http_request_permits_available(available: usize) {
|
||||
let gauge = HTTP_REQUEST_PERMITS_AVAILABLE.get_or_init(|| {
|
||||
let gauge = Arc::new(AtomicI64::new(0));
|
||||
register_http_request_permits_available_metric(gauge.clone());
|
||||
gauge
|
||||
});
|
||||
gauge.store(available.min(i64::MAX as usize) as i64, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_hit() {
|
||||
puzzle_gallery_cache_metrics().hits.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_miss() {
|
||||
puzzle_gallery_cache_metrics().misses.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(duration: std::time::Duration, data_bytes: usize) {
|
||||
let metrics = puzzle_gallery_cache_metrics();
|
||||
metrics.rebuilds.add(1, &[]);
|
||||
metrics
|
||||
.rebuild_duration
|
||||
.record(duration.as_secs_f64(), &[]);
|
||||
metrics
|
||||
.data_json_bytes
|
||||
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
|
||||
}
|
||||
|
||||
fn track_response_body_in_flight(response: Response<Body>) -> Response<Body> {
|
||||
response.map(|body| {
|
||||
HTTP_RESPONSE_BODY_IN_FLIGHT.fetch_add(1, Ordering::Relaxed);
|
||||
let guard = ResponseBodyInFlightGuard;
|
||||
Body::new(body.map_frame(move |frame| {
|
||||
let _guard = &guard;
|
||||
frame
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
struct HttpMetrics {
|
||||
@@ -76,6 +123,22 @@ struct HttpMetrics {
|
||||
duration: opentelemetry::metrics::Histogram<f64>,
|
||||
}
|
||||
|
||||
struct PuzzleGalleryCacheMetrics {
|
||||
hits: Counter<u64>,
|
||||
misses: Counter<u64>,
|
||||
rebuilds: Counter<u64>,
|
||||
rebuild_duration: opentelemetry::metrics::Histogram<f64>,
|
||||
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
|
||||
}
|
||||
|
||||
struct ResponseBodyInFlightGuard;
|
||||
|
||||
impl Drop for ResponseBodyInFlightGuard {
|
||||
fn drop(&mut self) {
|
||||
HTTP_RESPONSE_BODY_IN_FLIGHT.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
fn http_metrics() -> &'static HttpMetrics {
|
||||
static METRICS: std::sync::OnceLock<HttpMetrics> = std::sync::OnceLock::new();
|
||||
METRICS.get_or_init(|| {
|
||||
@@ -99,6 +162,64 @@ fn http_metrics() -> &'static HttpMetrics {
|
||||
})
|
||||
}
|
||||
|
||||
fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
||||
static METRICS: std::sync::OnceLock<PuzzleGalleryCacheMetrics> = std::sync::OnceLock::new();
|
||||
METRICS.get_or_init(|| {
|
||||
let meter = global::meter("genarrative-api");
|
||||
PuzzleGalleryCacheMetrics {
|
||||
hits: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.hits")
|
||||
.with_description("Puzzle gallery response cache hits")
|
||||
.build(),
|
||||
misses: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.misses")
|
||||
.with_description("Puzzle gallery response cache misses")
|
||||
.build(),
|
||||
rebuilds: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.rebuilds")
|
||||
.with_description("Puzzle gallery response cache rebuild count")
|
||||
.build(),
|
||||
rebuild_duration: meter
|
||||
.f64_histogram("genarrative.puzzle_gallery.cache.rebuild.duration")
|
||||
.with_unit("s")
|
||||
.with_description("Puzzle gallery response cache rebuild duration")
|
||||
.build(),
|
||||
data_json_bytes: meter
|
||||
.u64_histogram("genarrative.puzzle_gallery.cache.data_json_bytes")
|
||||
.with_unit("By")
|
||||
.with_description("Serialized puzzle gallery data JSON size")
|
||||
.build(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn register_http_request_permits_available_metric(gauge: Arc<AtomicI64>) {
|
||||
let meter = global::meter("genarrative-api");
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.http.server.request_permits.available")
|
||||
.with_unit("{permit}")
|
||||
.with_description("Available api-server HTTP backpressure permits")
|
||||
.with_callback(move |observer| {
|
||||
observer.observe(gauge.load(Ordering::Relaxed), &[]);
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
||||
pub(crate) fn register_http_runtime_metrics() {
|
||||
static REGISTERED: OnceLock<()> = OnceLock::new();
|
||||
REGISTERED.get_or_init(|| {
|
||||
let meter = global::meter("genarrative-api");
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.http.server.response_bodies.in_flight")
|
||||
.with_unit("{response}")
|
||||
.with_description("HTTP response bodies still owned by Axum/Hyper")
|
||||
.with_callback(|observer| {
|
||||
observer.observe(HTTP_RESPONSE_BODY_IN_FLIGHT.load(Ordering::Relaxed), &[]);
|
||||
})
|
||||
.build();
|
||||
});
|
||||
}
|
||||
|
||||
fn http_base_labels(method: String, route: String) -> Vec<KeyValue> {
|
||||
vec![
|
||||
KeyValue::new("http.request.method", method),
|
||||
|
||||
Reference in New Issue
Block a user