nixfleet_agent/runtime/workers/
cert_renewal.rs1use std::time::Duration;
24
25use nixfleet_proto::clock::ClockHandle;
26use tokio::task::JoinHandle;
27
28use super::super::{AgentConfig, ShutdownToken};
29
30const CHECK_INTERVAL: Duration = Duration::from_secs(60);
36
37const ERROR_BACKOFF: Duration = Duration::from_secs(30);
38
39pub fn spawn(
40 cfg: AgentConfig,
41 clock: ClockHandle,
42 shutdown: ShutdownToken,
43) -> JoinHandle<()> {
44 tokio::spawn(async move {
45 let mut shutdown_rx = shutdown.into_inner();
46 let Some(threshold) = cfg.renewal_threshold_fraction else {
47 tracing::info!(
48 target: "agent_cert_renewal",
49 "renewal_threshold_fraction unset — cert renewal worker disabled",
50 );
51 let _ = shutdown_rx.await;
54 return;
55 };
56 if !(0.0 < threshold && threshold < 1.0) {
57 tracing::error!(
58 target: "agent_cert_renewal",
59 threshold,
60 "renewal_threshold_fraction must be strictly between 0 and 1 — worker exiting",
61 );
62 let _ = shutdown_rx.await;
63 return;
64 }
65 let Some(cert_path) = cfg.client_cert.clone() else {
66 tracing::info!(
67 target: "agent_cert_renewal",
68 "client_cert unset — renewal worker has nothing to renew; exiting",
69 );
70 let _ = shutdown_rx.await;
71 return;
72 };
73
74 let client = match crate::comms::build_client(
75 cfg.ca_cert.as_deref(),
76 cfg.client_cert.as_deref(),
77 cfg.client_key.as_deref(),
78 ) {
79 Ok(c) => c,
80 Err(err) => {
81 tracing::error!(
82 target: "agent_cert_renewal",
83 error = %err,
84 "failed to build mTLS HTTP client; worker exits",
85 );
86 let _ = shutdown_rx.await;
87 return;
88 }
89 };
90
91 let mut ticker = tokio::time::interval(CHECK_INTERVAL);
92 ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
93
94 loop {
95 tokio::select! {
96 biased;
97 _ = &mut shutdown_rx => {
98 tracing::info!(
99 target: "shutdown",
100 task = "agent_cert_renewal",
101 "task shut down",
102 );
103 return;
104 }
105 _ = ticker.tick() => {
106 if let Err(err) = maybe_renew_once(
107 &client,
108 &cfg,
109 &clock,
110 &cert_path,
111 threshold,
112 ).await {
113 tracing::warn!(
114 target: "agent_cert_renewal",
115 error = %err,
116 "renewal check failed; backing off",
117 );
118 tokio::time::sleep(ERROR_BACKOFF).await;
119 }
120 }
121 }
122 }
123 })
124}
125
126async fn maybe_renew_once(
127 client: &reqwest::Client,
128 cfg: &AgentConfig,
129 clock: &ClockHandle,
130 cert_path: &std::path::Path,
131 threshold: f64,
132) -> anyhow::Result<()> {
133 let (remaining, not_after) =
134 crate::enrollment::cert_remaining_fraction(cert_path, clock.now())?;
135 if remaining >= threshold {
136 return Ok(());
137 }
138 tracing::info!(
139 target: "agent_cert_renewal",
140 remaining,
141 threshold,
142 not_after = %not_after.to_rfc3339(),
143 "cert remaining fraction below threshold; renewing",
144 );
145 crate::enrollment::renew(
146 client,
147 &cfg.control_plane_url,
148 &cfg.machine_id,
149 cert_path,
150 &cfg.ssh_host_key_file,
151 )
152 .await?;
153 tracing::info!(
154 target: "agent_cert_renewal",
155 cert = %cert_path.display(),
156 "cert renewed — file rewritten; running workers continue with prior in-memory client \
157 until next agent restart picks up the new cert",
158 );
159 Ok(())
160}