perf(api-server): tune gallery load shedding
This commit is contained in:
@@ -12,10 +12,14 @@ use std::sync::{
|
||||
};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::{request_context::resolve_request_id, state::AppState};
|
||||
use crate::{
|
||||
request_context::resolve_request_id,
|
||||
state::{AppState, HttpRequestPermitPoolKind},
|
||||
};
|
||||
|
||||
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
|
||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<Arc<AtomicI64>> = OnceLock::new();
|
||||
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<HttpRequestPermitsAvailableGauges> =
|
||||
OnceLock::new();
|
||||
|
||||
// 集中维护 api-server HTTP 观测,避免在 handler 中散落高基数字段或重复创建 instrument。
|
||||
pub async fn record_http_observability(
|
||||
@@ -78,29 +82,42 @@ pub async fn record_http_observability(
|
||||
track_response_body_in_flight(response)
|
||||
}
|
||||
|
||||
pub(crate) fn update_http_request_permits_available(available: usize) {
|
||||
let gauge = HTTP_REQUEST_PERMITS_AVAILABLE.get_or_init(|| {
|
||||
let gauge = Arc::new(AtomicI64::new(0));
|
||||
register_http_request_permits_available_metric(gauge.clone());
|
||||
gauge
|
||||
});
|
||||
gauge.store(available.min(i64::MAX as usize) as i64, Ordering::Relaxed);
|
||||
pub(crate) fn update_http_request_permits_available(
|
||||
pool: HttpRequestPermitPoolKind,
|
||||
available: usize,
|
||||
) {
|
||||
HTTP_REQUEST_PERMITS_AVAILABLE
|
||||
.get_or_init(register_http_request_permits_available_metric)
|
||||
.store(pool, available);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_hit() {
|
||||
puzzle_gallery_cache_metrics().hits.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_stale_hit() {
|
||||
puzzle_gallery_cache_metrics().stale_hits.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_miss() {
|
||||
puzzle_gallery_cache_metrics().misses.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(duration: std::time::Duration, data_bytes: usize) {
|
||||
pub(crate) fn record_puzzle_gallery_cache_refresh_started() {
|
||||
puzzle_gallery_cache_metrics().refreshes_started.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_refresh_failed() {
|
||||
puzzle_gallery_cache_metrics().refreshes_failed.add(1, &[]);
|
||||
}
|
||||
|
||||
pub(crate) fn record_puzzle_gallery_cache_rebuild(
|
||||
duration: std::time::Duration,
|
||||
data_bytes: usize,
|
||||
) {
|
||||
let metrics = puzzle_gallery_cache_metrics();
|
||||
metrics.rebuilds.add(1, &[]);
|
||||
metrics
|
||||
.rebuild_duration
|
||||
.record(duration.as_secs_f64(), &[]);
|
||||
metrics.rebuild_duration.record(duration.as_secs_f64(), &[]);
|
||||
metrics
|
||||
.data_json_bytes
|
||||
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
|
||||
@@ -125,12 +142,44 @@ struct HttpMetrics {
|
||||
|
||||
struct PuzzleGalleryCacheMetrics {
|
||||
hits: Counter<u64>,
|
||||
stale_hits: Counter<u64>,
|
||||
misses: Counter<u64>,
|
||||
refreshes_started: Counter<u64>,
|
||||
refreshes_failed: Counter<u64>,
|
||||
rebuilds: Counter<u64>,
|
||||
rebuild_duration: opentelemetry::metrics::Histogram<f64>,
|
||||
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
|
||||
}
|
||||
|
||||
struct HttpRequestPermitsAvailableGauges {
|
||||
default: Arc<AtomicI64>,
|
||||
gallery: Arc<AtomicI64>,
|
||||
detail: Arc<AtomicI64>,
|
||||
admin: Arc<AtomicI64>,
|
||||
}
|
||||
|
||||
impl HttpRequestPermitsAvailableGauges {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
default: Arc::new(AtomicI64::new(0)),
|
||||
gallery: Arc::new(AtomicI64::new(0)),
|
||||
detail: Arc::new(AtomicI64::new(0)),
|
||||
admin: Arc::new(AtomicI64::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn store(&self, pool: HttpRequestPermitPoolKind, available: usize) {
|
||||
let value = available.min(i64::MAX as usize) as i64;
|
||||
match pool {
|
||||
HttpRequestPermitPoolKind::Default => &self.default,
|
||||
HttpRequestPermitPoolKind::Gallery => &self.gallery,
|
||||
HttpRequestPermitPoolKind::Detail => &self.detail,
|
||||
HttpRequestPermitPoolKind::Admin => &self.admin,
|
||||
}
|
||||
.store(value, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
struct ResponseBodyInFlightGuard;
|
||||
|
||||
impl Drop for ResponseBodyInFlightGuard {
|
||||
@@ -171,10 +220,22 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.hits")
|
||||
.with_description("Puzzle gallery response cache hits")
|
||||
.build(),
|
||||
stale_hits: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.stale_hits")
|
||||
.with_description("Puzzle gallery stale response cache hits")
|
||||
.build(),
|
||||
misses: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.misses")
|
||||
.with_description("Puzzle gallery response cache misses")
|
||||
.build(),
|
||||
refreshes_started: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_started")
|
||||
.with_description("Puzzle gallery background refresh start count")
|
||||
.build(),
|
||||
refreshes_failed: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.refreshes_failed")
|
||||
.with_description("Puzzle gallery background refresh failure count")
|
||||
.build(),
|
||||
rebuilds: meter
|
||||
.u64_counter("genarrative.puzzle_gallery.cache.rebuilds")
|
||||
.with_description("Puzzle gallery response cache rebuild count")
|
||||
@@ -193,16 +254,49 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
|
||||
})
|
||||
}
|
||||
|
||||
fn register_http_request_permits_available_metric(gauge: Arc<AtomicI64>) {
|
||||
fn register_http_request_permits_available_metric() -> HttpRequestPermitsAvailableGauges {
|
||||
let gauges = HttpRequestPermitsAvailableGauges::new();
|
||||
let meter = global::meter("genarrative-api");
|
||||
let default_gauge = gauges.default.clone();
|
||||
let gallery_gauge = gauges.gallery.clone();
|
||||
let detail_gauge = gauges.detail.clone();
|
||||
let admin_gauge = gauges.admin.clone();
|
||||
meter
|
||||
.i64_observable_up_down_counter("genarrative.http.server.request_permits.available")
|
||||
.with_unit("{permit}")
|
||||
.with_description("Available api-server HTTP backpressure permits")
|
||||
.with_callback(move |observer| {
|
||||
observer.observe(gauge.load(Ordering::Relaxed), &[]);
|
||||
observer.observe(
|
||||
default_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Default.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
gallery_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Gallery.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
detail_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Detail.as_str(),
|
||||
)],
|
||||
);
|
||||
observer.observe(
|
||||
admin_gauge.load(Ordering::Relaxed),
|
||||
&[KeyValue::new(
|
||||
"pool",
|
||||
HttpRequestPermitPoolKind::Admin.as_str(),
|
||||
)],
|
||||
);
|
||||
})
|
||||
.build();
|
||||
gauges
|
||||
}
|
||||
|
||||
pub(crate) fn register_http_runtime_metrics() {
|
||||
@@ -284,19 +378,13 @@ mod tests {
|
||||
observability_route("/api/runtime/puzzle/runs/run-123/history"),
|
||||
"/api/*"
|
||||
);
|
||||
assert_eq!(
|
||||
observability_route("/admin/api/debug/http"),
|
||||
"/admin/api/*"
|
||||
);
|
||||
assert_eq!(observability_route("/admin/api/debug/http"), "/admin/api/*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_request_scheme_uses_forwarded_proto_first_value() {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
"x-forwarded-proto",
|
||||
HeaderValue::from_static("https, http"),
|
||||
);
|
||||
headers.insert("x-forwarded-proto", HeaderValue::from_static("https, http"));
|
||||
|
||||
assert_eq!(resolve_request_scheme(&headers), "https");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user