新增 egui 服务器管理面板并支持 SSH alias 多服务器巡检 接入硬件状态、服务状态、HTTP 探测和生产巡检状态展示 增加受控 systemd 启动关闭重启操作和中文字体注入 补充本地服务器面板技术方案与团队共享记忆
475 lines
14 KiB
Rust
475 lines
14 KiB
Rust
use std::collections::BTreeMap;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
|
pub enum HealthLevel {
|
|
Unknown,
|
|
Ok,
|
|
Warning,
|
|
Critical,
|
|
}
|
|
|
|
impl HealthLevel {
|
|
pub fn label(self) -> &'static str {
|
|
match self {
|
|
HealthLevel::Unknown => "未知",
|
|
HealthLevel::Ok => "正常",
|
|
HealthLevel::Warning => "警告",
|
|
HealthLevel::Critical => "异常",
|
|
}
|
|
}
|
|
|
|
pub fn rank(self) -> u8 {
|
|
match self {
|
|
HealthLevel::Unknown => 1,
|
|
HealthLevel::Ok => 0,
|
|
HealthLevel::Warning => 2,
|
|
HealthLevel::Critical => 3,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ServerHealthReport {
|
|
pub status: HealthLevel,
|
|
pub checked_at: String,
|
|
pub host: HostSnapshot,
|
|
pub hardware: HardwareSnapshot,
|
|
pub services: Vec<ServiceSnapshot>,
|
|
pub probes: Vec<ProbeSnapshot>,
|
|
pub health_patrol: Option<HealthPatrolSnapshot>,
|
|
pub raw_output: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct HostSnapshot {
|
|
pub hostname: String,
|
|
pub kernel: String,
|
|
pub uptime: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct HardwareSnapshot {
|
|
pub cpu_model: String,
|
|
pub cpu_cores: String,
|
|
pub load_average: String,
|
|
pub memory: MemorySnapshot,
|
|
pub swap: MemorySnapshot,
|
|
pub disks: Vec<DiskSnapshot>,
|
|
pub sensors: Vec<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct MemorySnapshot {
|
|
pub total: String,
|
|
pub used: String,
|
|
pub free: String,
|
|
pub available: String,
|
|
pub used_percent: Option<u8>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct DiskSnapshot {
|
|
pub mount: String,
|
|
pub filesystem: String,
|
|
pub size: String,
|
|
pub used: String,
|
|
pub available: String,
|
|
pub used_percent: Option<u8>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ServiceSnapshot {
|
|
pub name: String,
|
|
pub active: String,
|
|
pub sub: String,
|
|
pub unit_file: String,
|
|
pub level: HealthLevel,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ProbeSnapshot {
|
|
pub name: String,
|
|
pub target: String,
|
|
pub http_code: String,
|
|
pub elapsed_ms: Option<u64>,
|
|
pub level: HealthLevel,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct HealthPatrolSnapshot {
|
|
pub status: String,
|
|
pub checked_at: String,
|
|
pub summary: String,
|
|
pub level: HealthLevel,
|
|
}
|
|
|
|
pub fn parse_health_report(raw_output: &str) -> ServerHealthReport {
|
|
let mut sections: BTreeMap<String, Vec<String>> = BTreeMap::new();
|
|
let mut current = String::new();
|
|
|
|
for line in raw_output.lines() {
|
|
if let Some(name) = parse_section_marker(line) {
|
|
current = name.to_owned();
|
|
sections.entry(current.clone()).or_default();
|
|
} else if !current.is_empty() {
|
|
sections
|
|
.entry(current.clone())
|
|
.or_default()
|
|
.push(line.to_owned());
|
|
}
|
|
}
|
|
|
|
let mut report = ServerHealthReport {
|
|
status: HealthLevel::Unknown,
|
|
checked_at: section_value(§ions, "checked_at").unwrap_or_default(),
|
|
host: parse_host(§ions),
|
|
hardware: parse_hardware(§ions),
|
|
services: parse_services(§ions),
|
|
probes: parse_probes(§ions),
|
|
health_patrol: parse_health_patrol(§ions),
|
|
raw_output: raw_output.to_owned(),
|
|
};
|
|
report.status = summarize_report(&report);
|
|
report
|
|
}
|
|
|
|
pub fn summarize_report(report: &ServerHealthReport) -> HealthLevel {
|
|
let mut status = HealthLevel::Ok;
|
|
for level in report
|
|
.services
|
|
.iter()
|
|
.map(|service| service.level)
|
|
.chain(report.probes.iter().map(|probe| probe.level))
|
|
.chain(report.health_patrol.iter().map(|patrol| patrol.level))
|
|
{
|
|
if level.rank() > status.rank() {
|
|
status = level;
|
|
}
|
|
}
|
|
|
|
if let Some(used_percent) = report.hardware.memory.used_percent {
|
|
let memory_level = if used_percent >= 95 {
|
|
HealthLevel::Critical
|
|
} else if used_percent >= 85 {
|
|
HealthLevel::Warning
|
|
} else {
|
|
HealthLevel::Ok
|
|
};
|
|
if memory_level.rank() > status.rank() {
|
|
status = memory_level;
|
|
}
|
|
}
|
|
|
|
for disk in &report.hardware.disks {
|
|
let disk_level = match disk.used_percent {
|
|
Some(percent) if percent >= 95 => HealthLevel::Critical,
|
|
Some(percent) if percent >= 85 => HealthLevel::Warning,
|
|
_ => HealthLevel::Ok,
|
|
};
|
|
if disk_level.rank() > status.rank() {
|
|
status = disk_level;
|
|
}
|
|
}
|
|
|
|
status
|
|
}
|
|
|
|
fn parse_section_marker(line: &str) -> Option<&str> {
|
|
line.strip_prefix("==GENARRATIVE_PANEL:")
|
|
.and_then(|rest| rest.strip_suffix("=="))
|
|
}
|
|
|
|
fn section_value(sections: &BTreeMap<String, Vec<String>>, name: &str) -> Option<String> {
|
|
sections.get(name).and_then(|lines| {
|
|
lines
|
|
.iter()
|
|
.map(|line| line.trim())
|
|
.find(|line| !line.is_empty())
|
|
.map(str::to_owned)
|
|
})
|
|
}
|
|
|
|
fn parse_host(sections: &BTreeMap<String, Vec<String>>) -> HostSnapshot {
|
|
HostSnapshot {
|
|
hostname: section_value(sections, "hostname").unwrap_or_default(),
|
|
kernel: section_value(sections, "kernel").unwrap_or_default(),
|
|
uptime: section_value(sections, "uptime").unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
fn parse_hardware(sections: &BTreeMap<String, Vec<String>>) -> HardwareSnapshot {
|
|
HardwareSnapshot {
|
|
cpu_model: section_value(sections, "cpu_model").unwrap_or_default(),
|
|
cpu_cores: section_value(sections, "cpu_cores").unwrap_or_default(),
|
|
load_average: section_value(sections, "load_average").unwrap_or_default(),
|
|
memory: parse_memory(section_value(sections, "memory").as_deref()),
|
|
swap: parse_memory(section_value(sections, "swap").as_deref()),
|
|
disks: parse_disks(sections),
|
|
sensors: sections.get("sensors").cloned().unwrap_or_default(),
|
|
}
|
|
}
|
|
|
|
fn parse_memory(value: Option<&str>) -> MemorySnapshot {
|
|
let Some(value) = value else {
|
|
return MemorySnapshot::default();
|
|
};
|
|
let parts: Vec<&str> = value.split('|').collect();
|
|
MemorySnapshot {
|
|
total: parts.first().copied().unwrap_or_default().to_owned(),
|
|
used: parts.get(1).copied().unwrap_or_default().to_owned(),
|
|
free: parts.get(2).copied().unwrap_or_default().to_owned(),
|
|
available: parts.get(3).copied().unwrap_or_default().to_owned(),
|
|
used_percent: parts.get(4).and_then(|value| parse_percent(value)),
|
|
}
|
|
}
|
|
|
|
fn parse_disks(sections: &BTreeMap<String, Vec<String>>) -> Vec<DiskSnapshot> {
|
|
sections
|
|
.get("disks")
|
|
.into_iter()
|
|
.flatten()
|
|
.filter_map(|line| {
|
|
let parts: Vec<&str> = line.split('|').collect();
|
|
(parts.len() >= 6).then(|| DiskSnapshot {
|
|
filesystem: parts[0].to_owned(),
|
|
size: parts[1].to_owned(),
|
|
used: parts[2].to_owned(),
|
|
available: parts[3].to_owned(),
|
|
used_percent: parse_percent(parts[4]),
|
|
mount: parts[5].to_owned(),
|
|
})
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn parse_services(sections: &BTreeMap<String, Vec<String>>) -> Vec<ServiceSnapshot> {
|
|
sections
|
|
.get("services")
|
|
.into_iter()
|
|
.flatten()
|
|
.filter_map(|line| {
|
|
let parts: Vec<&str> = line.split('|').collect();
|
|
(parts.len() >= 4).then(|| {
|
|
let active = parts[1].to_owned();
|
|
let sub = parts[2].to_owned();
|
|
let level = if active == "active" {
|
|
HealthLevel::Ok
|
|
} else if active == "unknown" || active == "inactive" {
|
|
HealthLevel::Warning
|
|
} else {
|
|
HealthLevel::Critical
|
|
};
|
|
ServiceSnapshot {
|
|
name: parts[0].to_owned(),
|
|
active,
|
|
sub,
|
|
unit_file: parts[3].to_owned(),
|
|
level,
|
|
}
|
|
})
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn parse_probes(sections: &BTreeMap<String, Vec<String>>) -> Vec<ProbeSnapshot> {
|
|
sections
|
|
.get("probes")
|
|
.into_iter()
|
|
.flatten()
|
|
.filter_map(|line| {
|
|
let parts: Vec<&str> = line.split('|').collect();
|
|
(parts.len() >= 4).then(|| {
|
|
let http_code = parts[2].to_owned();
|
|
let elapsed_ms = parts[3].parse().ok();
|
|
let level = if http_code.starts_with('2') {
|
|
HealthLevel::Ok
|
|
} else if http_code == "000" {
|
|
HealthLevel::Critical
|
|
} else {
|
|
HealthLevel::Critical
|
|
};
|
|
ProbeSnapshot {
|
|
name: parts[0].to_owned(),
|
|
target: parts[1].to_owned(),
|
|
http_code,
|
|
elapsed_ms,
|
|
level,
|
|
}
|
|
})
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn parse_health_patrol(sections: &BTreeMap<String, Vec<String>>) -> Option<HealthPatrolSnapshot> {
|
|
let line = section_value(sections, "health_patrol")?;
|
|
let parts: Vec<&str> = line.split('|').collect();
|
|
let status = parts.first().copied().unwrap_or_default().to_owned();
|
|
let level = match status.as_str() {
|
|
"OK" => HealthLevel::Ok,
|
|
"WARNING" => HealthLevel::Warning,
|
|
"CRITICAL" => HealthLevel::Critical,
|
|
_ => HealthLevel::Unknown,
|
|
};
|
|
Some(HealthPatrolSnapshot {
|
|
status,
|
|
checked_at: parts.get(1).copied().unwrap_or_default().to_owned(),
|
|
summary: parts.get(2).copied().unwrap_or_default().to_owned(),
|
|
level,
|
|
})
|
|
}
|
|
|
|
fn parse_percent(value: &str) -> Option<u8> {
|
|
value.trim_end_matches('%').parse().ok()
|
|
}
|
|
|
|
pub const HEALTH_SCRIPT: &str = r#"set -eu
|
|
|
|
print_section() {
|
|
printf '==GENARRATIVE_PANEL:%s==\n' "$1"
|
|
}
|
|
|
|
print_section checked_at
|
|
date -Is 2>/dev/null || date
|
|
|
|
print_section hostname
|
|
hostname 2>/dev/null || true
|
|
|
|
print_section kernel
|
|
uname -srmo 2>/dev/null || uname -a 2>/dev/null || true
|
|
|
|
print_section uptime
|
|
uptime -p 2>/dev/null || uptime 2>/dev/null || true
|
|
|
|
print_section cpu_model
|
|
awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true
|
|
|
|
print_section cpu_cores
|
|
nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || true
|
|
|
|
print_section load_average
|
|
cat /proc/loadavg 2>/dev/null | awk '{print $1" "$2" "$3}' || true
|
|
|
|
print_section memory
|
|
awk '
|
|
/^MemTotal:/ {total=$2}
|
|
/^MemFree:/ {free=$2}
|
|
/^MemAvailable:/ {available=$2}
|
|
END {
|
|
if (total > 0) {
|
|
used = total - free
|
|
percent = int((used * 100 + total / 2) / total)
|
|
printf "%.1f GiB|%.1f GiB|%.1f GiB|%.1f GiB|%d%%\n", total/1048576, used/1048576, free/1048576, available/1048576, percent
|
|
}
|
|
}
|
|
' /proc/meminfo 2>/dev/null || true
|
|
|
|
print_section swap
|
|
awk '
|
|
/^SwapTotal:/ {total=$2}
|
|
/^SwapFree:/ {free=$2}
|
|
END {
|
|
if (total > 0) {
|
|
used = total - free
|
|
percent = int((used * 100 + total / 2) / total)
|
|
printf "%.1f GiB|%.1f GiB|%.1f GiB|%.1f GiB|%d%%\n", total/1048576, used/1048576, free/1048576, free/1048576, percent
|
|
} else {
|
|
print "0 GiB|0 GiB|0 GiB|0 GiB|0%"
|
|
}
|
|
}
|
|
' /proc/meminfo 2>/dev/null || true
|
|
|
|
print_section disks
|
|
for mount in / /var /opt /stdb /data; do
|
|
if [ -e "$mount" ]; then
|
|
df -hP "$mount" 2>/dev/null | awk 'NR == 2 {print $1"|"$2"|"$3"|"$4"|"$5"|"$6}'
|
|
fi
|
|
done | awk '!seen[$6]++'
|
|
|
|
print_section sensors
|
|
if command -v sensors >/dev/null 2>&1; then
|
|
sensors 2>/dev/null | sed -n '1,20p'
|
|
else
|
|
echo "sensors 未安装"
|
|
fi
|
|
|
|
print_section services
|
|
for service in genarrative-api.service spacetimedb.service nginx.service genarrative-health-patrol.timer genarrative-database-backup.timer; do
|
|
active=$(systemctl is-active "$service" 2>/dev/null || true)
|
|
sub=$(systemctl show "$service" -p SubState --value 2>/dev/null || true)
|
|
unit_file=$(systemctl show "$service" -p UnitFileState --value 2>/dev/null || true)
|
|
[ -n "$active" ] || active="unknown"
|
|
[ -n "$sub" ] || sub="unknown"
|
|
[ -n "$unit_file" ] || unit_file="unknown"
|
|
printf '%s|%s|%s|%s\n' "$service" "$active" "$sub" "$unit_file"
|
|
done
|
|
|
|
print_section probes
|
|
probe() {
|
|
name="$1"
|
|
url="$2"
|
|
tmp=$(mktemp)
|
|
code=$(curl -fsS -m 5 -o /dev/null -w '%{http_code}|%{time_total}' "$url" 2>"$tmp" || true)
|
|
if [ -z "$code" ]; then
|
|
code="000|0"
|
|
fi
|
|
http_code=${code%%|*}
|
|
time_total=${code#*|}
|
|
elapsed_ms=$(awk "BEGIN {printf \"%d\", $time_total * 1000}")
|
|
printf '%s|%s|%s|%s\n' "$name" "$url" "$http_code" "$elapsed_ms"
|
|
rm -f "$tmp"
|
|
}
|
|
probe "api:/healthz" "http://127.0.0.1:8082/healthz"
|
|
probe "api:/readyz" "http://127.0.0.1:8082/readyz"
|
|
probe "spacetimedb:/v1/ping" "http://127.0.0.1:3101/v1/ping"
|
|
probe "public:/api/creation-entry/config" "http://127.0.0.1:8082/api/creation-entry/config"
|
|
probe "public:/api/runtime/puzzle/gallery" "http://127.0.0.1:8082/api/runtime/puzzle/gallery"
|
|
|
|
print_section health_patrol
|
|
if [ -r /var/lib/genarrative/health-patrol/status.json ]; then
|
|
node -e '
|
|
const fs = require("fs");
|
|
const payload = JSON.parse(fs.readFileSync("/var/lib/genarrative/health-patrol/status.json", "utf8"));
|
|
const status = payload.status || "UNKNOWN";
|
|
const checkedAt = payload.checkedAt || "";
|
|
const checks = Array.isArray(payload.checks) ? payload.checks : [];
|
|
const summary = checks.filter((check) => check.status && check.status !== "OK").slice(0, 3).map((check) => `${check.name}:${check.status}`).join(",");
|
|
console.log(`${status}|${checkedAt}|${summary}`);
|
|
' 2>/dev/null || echo "UNKNOWN||状态文件解析失败"
|
|
else
|
|
echo "UNKNOWN||未找到 /var/lib/genarrative/health-patrol/status.json"
|
|
fi
|
|
"#;
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parses_report_sections() {
|
|
let report = parse_health_report(
|
|
r#"==GENARRATIVE_PANEL:checked_at==
|
|
2026-06-11T12:00:00+08:00
|
|
==GENARRATIVE_PANEL:hostname==
|
|
release
|
|
==GENARRATIVE_PANEL:memory==
|
|
2.0 GiB|1.0 GiB|1.0 GiB|1.0 GiB|50%
|
|
==GENARRATIVE_PANEL:disks==
|
|
/dev/sda1|40G|20G|20G|50%|/
|
|
==GENARRATIVE_PANEL:services==
|
|
genarrative-api.service|active|running|enabled
|
|
spacetimedb.service|failed|failed|enabled
|
|
==GENARRATIVE_PANEL:probes==
|
|
api:/readyz|http://127.0.0.1:8082/readyz|200|18
|
|
==GENARRATIVE_PANEL:health_patrol==
|
|
WARNING|2026-06-11T11:59:00Z|journal:WARNING
|
|
"#,
|
|
);
|
|
|
|
assert_eq!(report.host.hostname, "release");
|
|
assert_eq!(report.hardware.memory.used_percent, Some(50));
|
|
assert_eq!(report.services.len(), 2);
|
|
assert_eq!(report.probes[0].http_code, "200");
|
|
assert_eq!(report.status, HealthLevel::Critical);
|
|
}
|
|
}
|