nixfleet_control_plane/timers/
prune_timer.rs

1//! Hourly SQLite + backup-file hygiene sweep; idempotent steps, kill-safe at any tick.
2
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::time::{Duration, SystemTime};
6
7use tokio_util::sync::CancellationToken;
8
9use crate::db::Db;
10
11const TICK_INTERVAL: Duration = Duration::from_secs(60 * 60);
12const TOKEN_REPLAY_RETENTION_HOURS: i64 = 24;
13/// Operator's 60-day-back release investigation window.
14const FINISHED_ROLLOUTS_RETENTION_HOURS: i64 = 24 * 90;
15const BACKUP_RETENTION_DAYS: u64 = 14;
16const BACKUP_FILENAME_PREFIX: &str = "state.db.pre-";
17
18/// `db_path = None` skips the filesystem backup sweep (in-memory deployments).
19pub fn spawn(
20    cancel: CancellationToken,
21    db: Arc<Db>,
22    db_path: Option<PathBuf>,
23) -> tokio::task::JoinHandle<()> {
24    tokio::spawn(async move {
25        let mut ticker = tokio::time::interval(TICK_INTERVAL);
26        ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
27
28        loop {
29            tokio::select! {
30                _ = cancel.cancelled() => {
31                    tracing::info!(target: "shutdown", task = "prune_timer", "task shut down");
32                    return;
33                }
34                _ = ticker.tick() => {}
35            }
36            let token_pruned = try_prune("token_replay", || {
37                db.tokens().prune_token_replay(TOKEN_REPLAY_RETENTION_HOURS)
38            });
39            let rollouts_pruned = match db
40                .rollouts()
41                .prune_finished_rollouts(FINISHED_ROLLOUTS_RETENTION_HOURS)
42            {
43                Ok((_hrs_pruned, rollouts_pruned)) => rollouts_pruned,
44                Err(err) => {
45                    tracing::warn!(error = %err, "prune timer: finished_rollouts failed");
46                    0
47                }
48            };
49            let backups_pruned = db_path
50                .as_deref()
51                .and_then(Path::parent)
52                .map(|parent| {
53                    try_prune("state.db backup sweep", || {
54                        prune_backup_files(parent, BACKUP_FILENAME_PREFIX, BACKUP_RETENTION_DAYS)
55                    })
56                })
57                .unwrap_or(0);
58            tracing::info!(
59                target: "prune",
60                token_replay = token_pruned,
61                rollouts = rollouts_pruned,
62                state_db_backups = backups_pruned,
63                "prune timer: hourly sweep complete",
64            );
65        }
66    })
67}
68
69/// On `Err` logs a warn and returns 0 so the sweep continues.
70fn try_prune<E>(name: &str, f: impl FnOnce() -> std::result::Result<usize, E>) -> usize
71where
72    E: std::fmt::Display,
73{
74    match f() {
75        Ok(n) => n,
76        Err(err) => {
77            tracing::warn!(error = %err, "prune timer: {name} failed");
78            0
79        }
80    }
81}
82
83/// Per-file delete errors are logged + skipped; enumeration errors propagate.
84pub(crate) fn prune_backup_files(
85    parent: &Path,
86    prefix: &str,
87    retention_days: u64,
88) -> std::io::Result<usize> {
89    let cutoff = SystemTime::now()
90        .checked_sub(Duration::from_secs(retention_days * 24 * 60 * 60))
91        .unwrap_or(SystemTime::UNIX_EPOCH);
92
93    let mut deleted = 0usize;
94    let entries = match std::fs::read_dir(parent) {
95        Ok(it) => it,
96        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(0),
97        Err(err) => return Err(err),
98    };
99    for entry in entries {
100        let entry = match entry {
101            Ok(e) => e,
102            Err(err) => {
103                tracing::warn!(error = %err, "prune timer: read_dir entry failed");
104                continue;
105            }
106        };
107        let name = entry.file_name();
108        let Some(name_str) = name.to_str() else {
109            continue;
110        };
111        if !name_str.starts_with(prefix) {
112            continue;
113        }
114        let metadata = match entry.metadata() {
115            Ok(m) => m,
116            Err(err) => {
117                tracing::warn!(
118                    file = %name_str,
119                    error = %err,
120                    "prune timer: backup metadata failed",
121                );
122                continue;
123            }
124        };
125        if !metadata.is_file() {
126            continue;
127        }
128        let mtime = match metadata.modified() {
129            Ok(t) => t,
130            Err(err) => {
131                tracing::warn!(
132                    file = %name_str,
133                    error = %err,
134                    "prune timer: backup mtime unavailable",
135                );
136                continue;
137            }
138        };
139        if mtime >= cutoff {
140            continue;
141        }
142        let path = entry.path();
143        match std::fs::remove_file(&path) {
144            Ok(()) => {
145                tracing::info!(
146                    target: "prune",
147                    file = %path.display(),
148                    "pruned stale state.db backup",
149                );
150                deleted += 1;
151            }
152            Err(err) => {
153                tracing::warn!(
154                    file = %path.display(),
155                    error = %err,
156                    "prune timer: backup delete failed",
157                );
158            }
159        }
160    }
161    Ok(deleted)
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use std::time::Duration;
168
169    fn touch(path: &Path, age: Duration) {
170        let f = std::fs::File::create(path).unwrap();
171        f.set_modified(SystemTime::now() - age).unwrap();
172    }
173
174    #[test]
175    fn prune_backup_files_drops_old_keeps_young() {
176        let dir = tempfile::tempdir().unwrap();
177        let old = dir.path().join("state.db.pre-phase2-20240101-000000");
178        let young = dir.path().join("state.db.pre-phase2-20260430-235959");
179        let unrelated = dir.path().join("state.db");
180        touch(&old, Duration::from_secs(30 * 24 * 60 * 60));
181        touch(&young, Duration::from_secs(60));
182        touch(&unrelated, Duration::from_secs(30 * 24 * 60 * 60));
183
184        let pruned = prune_backup_files(dir.path(), "state.db.pre-", 14).unwrap();
185        assert_eq!(pruned, 1);
186        assert!(!old.exists(), "old backup should be deleted");
187        assert!(young.exists(), "young backup should be kept");
188        assert!(unrelated.exists(), "non-backup file should be untouched");
189    }
190
191    #[test]
192    fn prune_backup_files_returns_zero_when_dir_missing() {
193        let n = prune_backup_files(
194            Path::new("/nonexistent/path/that/should/not/exist"),
195            "state.db.pre-",
196            14,
197        )
198        .unwrap();
199        assert_eq!(n, 0);
200    }
201}