perf(api-server): batch route tracking through local outbox

This commit is contained in:
kdletters
2026-05-19 01:47:13 +08:00
parent 8038b6a6ee
commit 05a0f34722
23 changed files with 1131 additions and 23 deletions

View File

@@ -18,6 +18,8 @@ use crate::{
};
static HTTP_RESPONSE_BODY_IN_FLIGHT: AtomicI64 = AtomicI64::new(0);
static TRACKING_OUTBOX_PENDING_BYTES: AtomicI64 = AtomicI64::new(0);
static TRACKING_OUTBOX_PENDING_FILES: AtomicI64 = AtomicI64::new(0);
static HTTP_REQUEST_PERMITS_AVAILABLE: OnceLock<HttpRequestPermitsAvailableGauges> =
OnceLock::new();
@@ -123,6 +125,53 @@ pub(crate) fn record_puzzle_gallery_cache_rebuild(
.record(data_bytes.min(u64::MAX as usize) as u64, &[]);
}
pub(crate) fn record_tracking_outbox_enqueued() {
tracking_outbox_metrics().enqueued.add(1, &[]);
}
pub(crate) fn record_tracking_outbox_dropped(reason: &'static str) {
tracking_outbox_metrics()
.dropped
.add(1, &[KeyValue::new("reason", reason)]);
}
pub(crate) fn record_tracking_outbox_sealed(reason: &'static str) {
tracking_outbox_metrics()
.sealed_files
.add(1, &[KeyValue::new("reason", reason)]);
}
pub(crate) fn record_tracking_outbox_corrupt_file() {
tracking_outbox_metrics().corrupt_files.add(1, &[]);
}
pub(crate) fn record_tracking_outbox_flush(
duration: std::time::Duration,
accepted_count: u32,
file_bytes: u64,
failed: bool,
) {
let status_class = if failed { "error" } else { "ok" };
let labels = [KeyValue::new("status_class", status_class)];
let metrics = tracking_outbox_metrics();
metrics.flushes.add(1, &labels);
metrics
.flush_duration
.record(duration.as_secs_f64(), &labels);
metrics
.flushed_events
.add(u64::from(accepted_count), &labels);
metrics.flushed_bytes.add(file_bytes, &labels);
}
pub(crate) fn update_tracking_outbox_pending_bytes(bytes: u64) {
TRACKING_OUTBOX_PENDING_BYTES.store(bytes.min(i64::MAX as u64) as i64, Ordering::Relaxed);
}
pub(crate) fn update_tracking_outbox_pending_files(files: usize) {
TRACKING_OUTBOX_PENDING_FILES.store(files.min(i64::MAX as usize) as i64, Ordering::Relaxed);
}
fn track_response_body_in_flight(response: Response<Body>) -> Response<Body> {
response.map(|body| {
HTTP_RESPONSE_BODY_IN_FLIGHT.fetch_add(1, Ordering::Relaxed);
@@ -151,6 +200,17 @@ struct PuzzleGalleryCacheMetrics {
data_json_bytes: opentelemetry::metrics::Histogram<u64>,
}
struct TrackingOutboxMetrics {
enqueued: Counter<u64>,
dropped: Counter<u64>,
sealed_files: Counter<u64>,
corrupt_files: Counter<u64>,
flushes: Counter<u64>,
flush_duration: opentelemetry::metrics::Histogram<f64>,
flushed_events: Counter<u64>,
flushed_bytes: Counter<u64>,
}
struct HttpRequestPermitsAvailableGauges {
default: Arc<AtomicI64>,
gallery: Arc<AtomicI64>,
@@ -254,6 +314,51 @@ fn puzzle_gallery_cache_metrics() -> &'static PuzzleGalleryCacheMetrics {
})
}
fn tracking_outbox_metrics() -> &'static TrackingOutboxMetrics {
static METRICS: std::sync::OnceLock<TrackingOutboxMetrics> = std::sync::OnceLock::new();
METRICS.get_or_init(|| {
let meter = global::meter("genarrative-api");
TrackingOutboxMetrics {
enqueued: meter
.u64_counter("genarrative.tracking_outbox.events.enqueued")
.with_description("Tracking events appended to the local outbox")
.build(),
dropped: meter
.u64_counter("genarrative.tracking_outbox.events.dropped")
.with_description("Tracking events dropped by local outbox protection")
.build(),
sealed_files: meter
.u64_counter("genarrative.tracking_outbox.files.sealed")
.with_description("Tracking outbox active files sealed for flushing")
.build(),
corrupt_files: meter
.u64_counter("genarrative.tracking_outbox.files.corrupt")
.with_description(
"Tracking outbox sealed files quarantined because they could not be parsed",
)
.build(),
flushes: meter
.u64_counter("genarrative.tracking_outbox.flushes")
.with_description("Tracking outbox sealed file flush attempts")
.build(),
flush_duration: meter
.f64_histogram("genarrative.tracking_outbox.flush.duration")
.with_unit("s")
.with_description("Tracking outbox sealed file flush duration")
.build(),
flushed_events: meter
.u64_counter("genarrative.tracking_outbox.events.flushed")
.with_description("Tracking events accepted by SpacetimeDB batch procedure")
.build(),
flushed_bytes: meter
.u64_counter("genarrative.tracking_outbox.bytes.flushed")
.with_unit("By")
.with_description("Tracking outbox bytes removed after successful flush")
.build(),
}
})
}
fn register_http_request_permits_available_metric() -> HttpRequestPermitsAvailableGauges {
let gauges = HttpRequestPermitsAvailableGauges::new();
let meter = global::meter("genarrative-api");
@@ -311,6 +416,22 @@ pub(crate) fn register_http_runtime_metrics() {
observer.observe(HTTP_RESPONSE_BODY_IN_FLIGHT.load(Ordering::Relaxed), &[]);
})
.build();
meter
.i64_observable_up_down_counter("genarrative.tracking_outbox.pending.bytes")
.with_unit("By")
.with_description("Tracking outbox bytes waiting on local disk")
.with_callback(|observer| {
observer.observe(TRACKING_OUTBOX_PENDING_BYTES.load(Ordering::Relaxed), &[]);
})
.build();
meter
.i64_observable_up_down_counter("genarrative.tracking_outbox.pending.files")
.with_unit("{file}")
.with_description("Tracking outbox sealed files waiting for flush")
.with_callback(|observer| {
observer.observe(TRACKING_OUTBOX_PENDING_FILES.load(Ordering::Relaxed), &[]);
})
.build();
});
}