Files
Genarrative/server-rs/crates/server-manager-panel/src/health.rs
kdletters b54cbafc54 新增本地服务器管理面板
新增 egui 服务器管理面板并支持 SSH alias 多服务器巡检

接入硬件状态、服务状态、HTTP 探测和生产巡检状态展示

增加受控 systemd 启动关闭重启操作和中文字体注入

补充本地服务器面板技术方案与团队共享记忆
2026-06-11 22:33:05 +08:00

475 lines
14 KiB
Rust

use std::collections::BTreeMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum HealthLevel {
Unknown,
Ok,
Warning,
Critical,
}
impl HealthLevel {
pub fn label(self) -> &'static str {
match self {
HealthLevel::Unknown => "未知",
HealthLevel::Ok => "正常",
HealthLevel::Warning => "警告",
HealthLevel::Critical => "异常",
}
}
pub fn rank(self) -> u8 {
match self {
HealthLevel::Unknown => 1,
HealthLevel::Ok => 0,
HealthLevel::Warning => 2,
HealthLevel::Critical => 3,
}
}
}
#[derive(Debug, Clone)]
pub struct ServerHealthReport {
pub status: HealthLevel,
pub checked_at: String,
pub host: HostSnapshot,
pub hardware: HardwareSnapshot,
pub services: Vec<ServiceSnapshot>,
pub probes: Vec<ProbeSnapshot>,
pub health_patrol: Option<HealthPatrolSnapshot>,
pub raw_output: String,
}
#[derive(Debug, Clone, Default)]
pub struct HostSnapshot {
pub hostname: String,
pub kernel: String,
pub uptime: String,
}
#[derive(Debug, Clone, Default)]
pub struct HardwareSnapshot {
pub cpu_model: String,
pub cpu_cores: String,
pub load_average: String,
pub memory: MemorySnapshot,
pub swap: MemorySnapshot,
pub disks: Vec<DiskSnapshot>,
pub sensors: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct MemorySnapshot {
pub total: String,
pub used: String,
pub free: String,
pub available: String,
pub used_percent: Option<u8>,
}
#[derive(Debug, Clone, Default)]
pub struct DiskSnapshot {
pub mount: String,
pub filesystem: String,
pub size: String,
pub used: String,
pub available: String,
pub used_percent: Option<u8>,
}
#[derive(Debug, Clone)]
pub struct ServiceSnapshot {
pub name: String,
pub active: String,
pub sub: String,
pub unit_file: String,
pub level: HealthLevel,
}
#[derive(Debug, Clone)]
pub struct ProbeSnapshot {
pub name: String,
pub target: String,
pub http_code: String,
pub elapsed_ms: Option<u64>,
pub level: HealthLevel,
}
#[derive(Debug, Clone)]
pub struct HealthPatrolSnapshot {
pub status: String,
pub checked_at: String,
pub summary: String,
pub level: HealthLevel,
}
pub fn parse_health_report(raw_output: &str) -> ServerHealthReport {
let mut sections: BTreeMap<String, Vec<String>> = BTreeMap::new();
let mut current = String::new();
for line in raw_output.lines() {
if let Some(name) = parse_section_marker(line) {
current = name.to_owned();
sections.entry(current.clone()).or_default();
} else if !current.is_empty() {
sections
.entry(current.clone())
.or_default()
.push(line.to_owned());
}
}
let mut report = ServerHealthReport {
status: HealthLevel::Unknown,
checked_at: section_value(&sections, "checked_at").unwrap_or_default(),
host: parse_host(&sections),
hardware: parse_hardware(&sections),
services: parse_services(&sections),
probes: parse_probes(&sections),
health_patrol: parse_health_patrol(&sections),
raw_output: raw_output.to_owned(),
};
report.status = summarize_report(&report);
report
}
pub fn summarize_report(report: &ServerHealthReport) -> HealthLevel {
let mut status = HealthLevel::Ok;
for level in report
.services
.iter()
.map(|service| service.level)
.chain(report.probes.iter().map(|probe| probe.level))
.chain(report.health_patrol.iter().map(|patrol| patrol.level))
{
if level.rank() > status.rank() {
status = level;
}
}
if let Some(used_percent) = report.hardware.memory.used_percent {
let memory_level = if used_percent >= 95 {
HealthLevel::Critical
} else if used_percent >= 85 {
HealthLevel::Warning
} else {
HealthLevel::Ok
};
if memory_level.rank() > status.rank() {
status = memory_level;
}
}
for disk in &report.hardware.disks {
let disk_level = match disk.used_percent {
Some(percent) if percent >= 95 => HealthLevel::Critical,
Some(percent) if percent >= 85 => HealthLevel::Warning,
_ => HealthLevel::Ok,
};
if disk_level.rank() > status.rank() {
status = disk_level;
}
}
status
}
fn parse_section_marker(line: &str) -> Option<&str> {
line.strip_prefix("==GENARRATIVE_PANEL:")
.and_then(|rest| rest.strip_suffix("=="))
}
fn section_value(sections: &BTreeMap<String, Vec<String>>, name: &str) -> Option<String> {
sections.get(name).and_then(|lines| {
lines
.iter()
.map(|line| line.trim())
.find(|line| !line.is_empty())
.map(str::to_owned)
})
}
fn parse_host(sections: &BTreeMap<String, Vec<String>>) -> HostSnapshot {
HostSnapshot {
hostname: section_value(sections, "hostname").unwrap_or_default(),
kernel: section_value(sections, "kernel").unwrap_or_default(),
uptime: section_value(sections, "uptime").unwrap_or_default(),
}
}
fn parse_hardware(sections: &BTreeMap<String, Vec<String>>) -> HardwareSnapshot {
HardwareSnapshot {
cpu_model: section_value(sections, "cpu_model").unwrap_or_default(),
cpu_cores: section_value(sections, "cpu_cores").unwrap_or_default(),
load_average: section_value(sections, "load_average").unwrap_or_default(),
memory: parse_memory(section_value(sections, "memory").as_deref()),
swap: parse_memory(section_value(sections, "swap").as_deref()),
disks: parse_disks(sections),
sensors: sections.get("sensors").cloned().unwrap_or_default(),
}
}
fn parse_memory(value: Option<&str>) -> MemorySnapshot {
let Some(value) = value else {
return MemorySnapshot::default();
};
let parts: Vec<&str> = value.split('|').collect();
MemorySnapshot {
total: parts.first().copied().unwrap_or_default().to_owned(),
used: parts.get(1).copied().unwrap_or_default().to_owned(),
free: parts.get(2).copied().unwrap_or_default().to_owned(),
available: parts.get(3).copied().unwrap_or_default().to_owned(),
used_percent: parts.get(4).and_then(|value| parse_percent(value)),
}
}
fn parse_disks(sections: &BTreeMap<String, Vec<String>>) -> Vec<DiskSnapshot> {
sections
.get("disks")
.into_iter()
.flatten()
.filter_map(|line| {
let parts: Vec<&str> = line.split('|').collect();
(parts.len() >= 6).then(|| DiskSnapshot {
filesystem: parts[0].to_owned(),
size: parts[1].to_owned(),
used: parts[2].to_owned(),
available: parts[3].to_owned(),
used_percent: parse_percent(parts[4]),
mount: parts[5].to_owned(),
})
})
.collect()
}
fn parse_services(sections: &BTreeMap<String, Vec<String>>) -> Vec<ServiceSnapshot> {
sections
.get("services")
.into_iter()
.flatten()
.filter_map(|line| {
let parts: Vec<&str> = line.split('|').collect();
(parts.len() >= 4).then(|| {
let active = parts[1].to_owned();
let sub = parts[2].to_owned();
let level = if active == "active" {
HealthLevel::Ok
} else if active == "unknown" || active == "inactive" {
HealthLevel::Warning
} else {
HealthLevel::Critical
};
ServiceSnapshot {
name: parts[0].to_owned(),
active,
sub,
unit_file: parts[3].to_owned(),
level,
}
})
})
.collect()
}
fn parse_probes(sections: &BTreeMap<String, Vec<String>>) -> Vec<ProbeSnapshot> {
sections
.get("probes")
.into_iter()
.flatten()
.filter_map(|line| {
let parts: Vec<&str> = line.split('|').collect();
(parts.len() >= 4).then(|| {
let http_code = parts[2].to_owned();
let elapsed_ms = parts[3].parse().ok();
let level = if http_code.starts_with('2') {
HealthLevel::Ok
} else if http_code == "000" {
HealthLevel::Critical
} else {
HealthLevel::Critical
};
ProbeSnapshot {
name: parts[0].to_owned(),
target: parts[1].to_owned(),
http_code,
elapsed_ms,
level,
}
})
})
.collect()
}
fn parse_health_patrol(sections: &BTreeMap<String, Vec<String>>) -> Option<HealthPatrolSnapshot> {
let line = section_value(sections, "health_patrol")?;
let parts: Vec<&str> = line.split('|').collect();
let status = parts.first().copied().unwrap_or_default().to_owned();
let level = match status.as_str() {
"OK" => HealthLevel::Ok,
"WARNING" => HealthLevel::Warning,
"CRITICAL" => HealthLevel::Critical,
_ => HealthLevel::Unknown,
};
Some(HealthPatrolSnapshot {
status,
checked_at: parts.get(1).copied().unwrap_or_default().to_owned(),
summary: parts.get(2).copied().unwrap_or_default().to_owned(),
level,
})
}
fn parse_percent(value: &str) -> Option<u8> {
value.trim_end_matches('%').parse().ok()
}
pub const HEALTH_SCRIPT: &str = r#"set -eu
print_section() {
printf '==GENARRATIVE_PANEL:%s==\n' "$1"
}
print_section checked_at
date -Is 2>/dev/null || date
print_section hostname
hostname 2>/dev/null || true
print_section kernel
uname -srmo 2>/dev/null || uname -a 2>/dev/null || true
print_section uptime
uptime -p 2>/dev/null || uptime 2>/dev/null || true
print_section cpu_model
awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true
print_section cpu_cores
nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || true
print_section load_average
cat /proc/loadavg 2>/dev/null | awk '{print $1" "$2" "$3}' || true
print_section memory
awk '
/^MemTotal:/ {total=$2}
/^MemFree:/ {free=$2}
/^MemAvailable:/ {available=$2}
END {
if (total > 0) {
used = total - free
percent = int((used * 100 + total / 2) / total)
printf "%.1f GiB|%.1f GiB|%.1f GiB|%.1f GiB|%d%%\n", total/1048576, used/1048576, free/1048576, available/1048576, percent
}
}
' /proc/meminfo 2>/dev/null || true
print_section swap
awk '
/^SwapTotal:/ {total=$2}
/^SwapFree:/ {free=$2}
END {
if (total > 0) {
used = total - free
percent = int((used * 100 + total / 2) / total)
printf "%.1f GiB|%.1f GiB|%.1f GiB|%.1f GiB|%d%%\n", total/1048576, used/1048576, free/1048576, free/1048576, percent
} else {
print "0 GiB|0 GiB|0 GiB|0 GiB|0%"
}
}
' /proc/meminfo 2>/dev/null || true
print_section disks
for mount in / /var /opt /stdb /data; do
if [ -e "$mount" ]; then
df -hP "$mount" 2>/dev/null | awk 'NR == 2 {print $1"|"$2"|"$3"|"$4"|"$5"|"$6}'
fi
done | awk '!seen[$6]++'
print_section sensors
if command -v sensors >/dev/null 2>&1; then
sensors 2>/dev/null | sed -n '1,20p'
else
echo "sensors 未安装"
fi
print_section services
for service in genarrative-api.service spacetimedb.service nginx.service genarrative-health-patrol.timer genarrative-database-backup.timer; do
active=$(systemctl is-active "$service" 2>/dev/null || true)
sub=$(systemctl show "$service" -p SubState --value 2>/dev/null || true)
unit_file=$(systemctl show "$service" -p UnitFileState --value 2>/dev/null || true)
[ -n "$active" ] || active="unknown"
[ -n "$sub" ] || sub="unknown"
[ -n "$unit_file" ] || unit_file="unknown"
printf '%s|%s|%s|%s\n' "$service" "$active" "$sub" "$unit_file"
done
print_section probes
probe() {
name="$1"
url="$2"
tmp=$(mktemp)
code=$(curl -fsS -m 5 -o /dev/null -w '%{http_code}|%{time_total}' "$url" 2>"$tmp" || true)
if [ -z "$code" ]; then
code="000|0"
fi
http_code=${code%%|*}
time_total=${code#*|}
elapsed_ms=$(awk "BEGIN {printf \"%d\", $time_total * 1000}")
printf '%s|%s|%s|%s\n' "$name" "$url" "$http_code" "$elapsed_ms"
rm -f "$tmp"
}
probe "api:/healthz" "http://127.0.0.1:8082/healthz"
probe "api:/readyz" "http://127.0.0.1:8082/readyz"
probe "spacetimedb:/v1/ping" "http://127.0.0.1:3101/v1/ping"
probe "public:/api/creation-entry/config" "http://127.0.0.1:8082/api/creation-entry/config"
probe "public:/api/runtime/puzzle/gallery" "http://127.0.0.1:8082/api/runtime/puzzle/gallery"
print_section health_patrol
if [ -r /var/lib/genarrative/health-patrol/status.json ]; then
node -e '
const fs = require("fs");
const payload = JSON.parse(fs.readFileSync("/var/lib/genarrative/health-patrol/status.json", "utf8"));
const status = payload.status || "UNKNOWN";
const checkedAt = payload.checkedAt || "";
const checks = Array.isArray(payload.checks) ? payload.checks : [];
const summary = checks.filter((check) => check.status && check.status !== "OK").slice(0, 3).map((check) => `${check.name}:${check.status}`).join(",");
console.log(`${status}|${checkedAt}|${summary}`);
' 2>/dev/null || echo "UNKNOWN||状态文件解析失败"
else
echo "UNKNOWN||未找到 /var/lib/genarrative/health-patrol/status.json"
fi
"#;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_report_sections() {
let report = parse_health_report(
r#"==GENARRATIVE_PANEL:checked_at==
2026-06-11T12:00:00+08:00
==GENARRATIVE_PANEL:hostname==
release
==GENARRATIVE_PANEL:memory==
2.0 GiB|1.0 GiB|1.0 GiB|1.0 GiB|50%
==GENARRATIVE_PANEL:disks==
/dev/sda1|40G|20G|20G|50%|/
==GENARRATIVE_PANEL:services==
genarrative-api.service|active|running|enabled
spacetimedb.service|failed|failed|enabled
==GENARRATIVE_PANEL:probes==
api:/readyz|http://127.0.0.1:8082/readyz|200|18
==GENARRATIVE_PANEL:health_patrol==
WARNING|2026-06-11T11:59:00Z|journal:WARNING
"#,
);
assert_eq!(report.host.hostname, "release");
assert_eq!(report.hardware.memory.used_percent, Some(50));
assert_eq!(report.services.len(), 2);
assert_eq!(report.probes[0].http_code, "200");
assert_eq!(report.status, HealthLevel::Critical);
}
}