修复冷备份后 API 恢复

备份脚本支持冷备份后重启依赖服务

生产备份与发布脚本恢复 genarrative-api 服务

api-server 启动恢复 SpacetimeDB 超时后持续重试

同步更新后端与运维文档口径
This commit is contained in:
kdletters
2026-06-09 12:35:27 +08:00
parent c9c66f046b
commit 568509027c
6 changed files with 60 additions and 10 deletions

View File

@@ -119,6 +119,7 @@ use crate::{
const API_SERVER_STARTUP_STACK_SIZE_BYTES: usize = 32 * 1024 * 1024;
const AUTH_STORE_STARTUP_RESTORE_TIMEOUT: Duration = Duration::from_secs(8);
const AUTH_STORE_STARTUP_RETRY_INTERVAL: Duration = Duration::from_secs(5);
#[derive(Clone)]
struct ShutdownContext {
@@ -318,6 +319,25 @@ fn build_tcp_listener(
async fn restore_app_state_for_startup(
config: AppConfig,
) -> Result<AppState, state::AppStateInitError> {
loop {
match try_restore_app_state_for_startup(config.clone()).await {
Ok(state) => return Ok(state),
Err(state::AppStateInitError::DependencyUnavailable(message)) => {
warn!(
retry_after_seconds = AUTH_STORE_STARTUP_RETRY_INTERVAL.as_secs(),
error = %message,
"启动恢复 SpacetimeDB 认证快照暂不可用api-server 将继续重试"
);
tokio::time::sleep(AUTH_STORE_STARTUP_RETRY_INTERVAL).await;
}
Err(error) => return Err(error),
}
}
}
async fn try_restore_app_state_for_startup(
config: AppConfig,
) -> Result<AppState, state::AppStateInitError> {
match timeout(
AUTH_STORE_STARTUP_RESTORE_TIMEOUT,
@@ -329,7 +349,7 @@ async fn restore_app_state_for_startup(
Err(_) => {
error!(
timeout_seconds = AUTH_STORE_STARTUP_RESTORE_TIMEOUT.as_secs(),
"启动等待 SpacetimeDB 恢复认证快照超时api-server 将进入依赖不可用模式"
"启动等待 SpacetimeDB 恢复认证快照超时"
);
Err(state::AppStateInitError::DependencyUnavailable(
"SpacetimeDB 启动恢复认证快照超时".to_string(),
@@ -412,7 +432,10 @@ fn is_valid_env_key(key: &str) -> bool {
#[cfg(test)]
mod tests {
use super::{is_valid_env_key, protected_env_keys_from, strip_env_value};
use super::{
AUTH_STORE_STARTUP_RETRY_INTERVAL, is_valid_env_key, protected_env_keys_from,
strip_env_value,
};
#[test]
fn strip_env_value_removes_wrapping_quotes() {
@@ -453,4 +476,9 @@ mod tests {
assert!(!protected.contains("ALIYUN_OSS_ENDPOINT"));
assert!(protected.contains("ALIYUN_OSS_ACCESS_KEY_ID"));
}
#[test]
fn startup_dependency_retry_interval_is_short_enough_for_service_recovery() {
assert_eq!(AUTH_STORE_STARTUP_RETRY_INTERVAL.as_secs(), 5);
}
}