nixfleet_agent/
enrollment.rs

1//! Bootstrap enrollment + cert renewal. Both flows sign the CSR with the
2//! host's SSH ed25519 key (RFC-0003 §2); the agent never generates keys.
3
4use std::path::Path;
5
6use anyhow::{Context, Result};
7use chrono::{DateTime, Utc};
8use nixfleet_proto::agent_wire::{PROTOCOL_MAJOR_VERSION, PROTOCOL_VERSION_HEADER};
9use nixfleet_proto::enroll_wire::{
10    BootstrapToken, EnrollRequest, EnrollResponse, RenewRequest, RenewResponse,
11};
12use rcgen::{CertificateParams, DnType, KeyPair};
13use reqwest::Client;
14use sha2::Digest;
15use x509_parser::prelude::*;
16
17/// Builds a CSR signed by the SSH host key; returns `(PEM CSR, raw 32-byte
18/// pubkey)`. CP rejects if the pubkey doesn't match `hosts.<hostname>.pubkey`.
19/// FOOTGUN: SSH key is OpenSSH PEM; rcgen wants PKCS#8 - we rewrap via the
20/// proto helper before handing to `KeyPair::from_pem`.
21pub fn generate_csr_from_ssh_host_key(
22    hostname: &str,
23    ssh_host_key_path: &Path,
24) -> Result<(String, [u8; 32])> {
25    let raw = std::fs::read_to_string(ssh_host_key_path)
26        .with_context(|| format!("read ssh host key {}", ssh_host_key_path.display()))?;
27    let private = ssh_key::PrivateKey::from_openssh(&raw)
28        .with_context(|| format!("parse OpenSSH key at {}", ssh_host_key_path.display()))?;
29    let seed = match private.key_data() {
30        ssh_key::private::KeypairData::Ed25519(kp) => kp.private.to_bytes(),
31        other => anyhow::bail!(
32            "ssh host key at {} is not ed25519 (algorithm: {:?})",
33            ssh_host_key_path.display(),
34            other.algorithm()
35        ),
36    };
37    let pkcs8_pem = nixfleet_proto::host_key::ed25519_pkcs8_pem_from_seed(&seed);
38    let key = KeyPair::from_pem(&pkcs8_pem).context("rcgen KeyPair::from_pem PKCS#8 ed25519")?;
39    let mut params = CertificateParams::default();
40    params.distinguished_name.push(DnType::CommonName, hostname);
41    let csr = params.serialize_request(&key).context("serialize CSR")?;
42    let csr_pem = csr.pem().context("CSR PEM encode")?;
43    let mut pubkey = [0u8; 32];
44    pubkey.copy_from_slice(key.public_key_raw());
45    Ok((csr_pem, pubkey))
46}
47
48/// base64 SHA-256 of raw pubkey bytes; matches CP's
49/// `expected_pubkey_fingerprint` field on the bootstrap token.
50pub fn fingerprint_pubkey_raw(pubkey_raw: &[u8]) -> String {
51    use base64::Engine;
52    let digest = sha2::Sha256::digest(pubkey_raw);
53    base64::engine::general_purpose::STANDARD.encode(digest)
54}
55
56pub async fn enroll(
57    client: &Client,
58    cp_url: &str,
59    hostname: &str,
60    token_file: &Path,
61    cert_path: &Path,
62    ssh_host_key_path: &Path,
63) -> Result<()> {
64    let token_raw = std::fs::read_to_string(token_file)
65        .with_context(|| format!("read bootstrap token {}", token_file.display()))?;
66    let token: BootstrapToken =
67        serde_json::from_str(&token_raw).context("parse bootstrap token")?;
68
69    let (csr_pem, _pubkey_raw) = generate_csr_from_ssh_host_key(hostname, ssh_host_key_path)?;
70
71    let url = format!("{}/v1/enroll", cp_url.trim_end_matches('/'));
72    let req = EnrollRequest { token, csr_pem };
73
74    // CP returns 503 "control plane not ready" between boot and first signed
75    // artifact landing (CI build window). Retry in-process instead of
76    // crashing the agent — systemd respawn loops on cold start lose minutes
77    // and look like agent defects in journals.
78    let body = loop {
79        let resp = client
80            .post(&url)
81            .header(PROTOCOL_VERSION_HEADER, PROTOCOL_MAJOR_VERSION.to_string())
82            .json(&req)
83            .send()
84            .await?;
85        let status = resp.status();
86        if status.is_success() {
87            break resp
88                .json::<EnrollResponse>()
89                .await
90                .context("parse enroll response")?;
91        }
92        let body_text = resp.text().await.unwrap_or_default();
93        if status == reqwest::StatusCode::SERVICE_UNAVAILABLE
94            && body_text.contains("control plane not ready")
95        {
96            tracing::info!(
97                target: "nixfleet_agent::enrollment",
98                "enroll: CP cold-starting (awaiting first signed artifact); retrying in 10s"
99            );
100            tokio::time::sleep(std::time::Duration::from_secs(10)).await;
101            continue;
102        }
103        anyhow::bail!("enroll {}: {}: {}", url, status, body_text);
104    };
105
106    // Write only the cert; the private key is the SSH host key already
107    // on disk at ssh_host_key_path. --client-key points there.
108    write_atomic(cert_path, body.cert_pem.as_bytes())?;
109    tracing::info!(
110        cert = %cert_path.display(),
111        ssh_host_key = %ssh_host_key_path.display(),
112        not_after = %body.not_after.to_rfc3339(),
113        "enrolled - wrote cert (key is ssh host key, not written)"
114    );
115    Ok(())
116}
117
118pub async fn renew(
119    client: &Client,
120    cp_url: &str,
121    hostname: &str,
122    cert_path: &Path,
123    ssh_host_key_path: &Path,
124) -> Result<()> {
125    let (csr_pem, _pubkey_raw) = generate_csr_from_ssh_host_key(hostname, ssh_host_key_path)?;
126    let url = format!("{}/v1/agent/renew", cp_url.trim_end_matches('/'));
127    let req = RenewRequest { csr_pem };
128    let resp = client
129        .post(&url)
130        .header(PROTOCOL_VERSION_HEADER, PROTOCOL_MAJOR_VERSION.to_string())
131        .json(&req)
132        .send()
133        .await?;
134    if !resp.status().is_success() {
135        anyhow::bail!(
136            "renew {}: {}: {}",
137            url,
138            resp.status(),
139            resp.text().await.unwrap_or_default()
140        );
141    }
142    let body: RenewResponse = resp.json().await.context("parse renew response")?;
143    write_atomic(cert_path, body.cert_pem.as_bytes())?;
144    tracing::info!(
145        cert = %cert_path.display(),
146        not_after = %body.not_after.to_rfc3339(),
147        "renewed - wrote cert (key unchanged: ssh host key)"
148    );
149    Ok(())
150}
151
152/// Tempfile + rename so a crash mid-write doesn't leave a half-written cert.
153fn write_atomic(path: &Path, contents: &[u8]) -> Result<()> {
154    let parent = path.parent().context("path has no parent")?;
155    let tmp = parent.join(format!(
156        ".{}-tmp",
157        path.file_name()
158            .map(|n| n.to_string_lossy().into_owned())
159            .unwrap_or_else(|| "out".to_string())
160    ));
161    std::fs::write(&tmp, contents).with_context(|| format!("write {}", tmp.display()))?;
162    std::fs::rename(&tmp, path)
163        .with_context(|| format!("rename {} -> {}", tmp.display(), path.display()))?;
164    Ok(())
165}
166
167/// Returns `(remaining_fraction, not_after)`; `< 0.5` means time to renew.
168pub fn cert_remaining_fraction(
169    cert_path: &Path,
170    now: DateTime<Utc>,
171) -> Result<(f64, DateTime<Utc>)> {
172    let pem = std::fs::read_to_string(cert_path)
173        .with_context(|| format!("read cert {}", cert_path.display()))?;
174    let der = pem::parse(pem.as_bytes()).map_err(|e| anyhow::anyhow!("parse cert PEM: {e}"))?;
175    let (_, cert) = X509Certificate::from_der(der.contents())
176        .map_err(|e| anyhow::anyhow!("parse cert DER: {e}"))?;
177    let not_before = cert.validity().not_before.timestamp();
178    let not_after = cert.validity().not_after.timestamp();
179    let total = (not_after - not_before).max(1) as f64;
180    let elapsed = (now.timestamp() - not_before).max(0) as f64;
181    let remaining = (total - elapsed).max(0.0) / total;
182    let na_dt = DateTime::<Utc>::from_timestamp(not_after, 0)
183        .unwrap_or_else(|| Utc::now() + chrono::Duration::seconds(1));
184    Ok((remaining, na_dt))
185}
186
187mod pem {
188    use anyhow::{Context, Result};
189    pub struct Parsed {
190        contents: Vec<u8>,
191    }
192    impl Parsed {
193        pub fn contents(&self) -> &[u8] {
194            &self.contents
195        }
196    }
197    pub fn parse(input: &[u8]) -> Result<Parsed> {
198        use base64::Engine;
199        let s = std::str::from_utf8(input).context("PEM not UTF-8")?;
200        let body: String = s
201            .lines()
202            .filter(|l| !l.starts_with("-----"))
203            .collect::<Vec<_>>()
204            .join("");
205        let bytes = base64::engine::general_purpose::STANDARD
206            .decode(body)
207            .context("PEM base64 decode")?;
208        Ok(Parsed { contents: bytes })
209    }
210}
211
212#[cfg(test)]
213mod ssh_host_key_csr_tests {
214    use super::*;
215    use ed25519_dalek::SigningKey;
216    use rand::RngCore;
217    use ssh_key::{LineEnding, PrivateKey};
218
219    fn write_test_ssh_host_key(dir: &Path) -> std::path::PathBuf {
220        let mut seed = [0u8; 32];
221        rand::rng().fill_bytes(&mut seed);
222        let sk = SigningKey::from_bytes(&seed);
223        let kp = ssh_key::private::Ed25519Keypair {
224            public: ssh_key::public::Ed25519PublicKey(sk.verifying_key().to_bytes()),
225            private: ssh_key::private::Ed25519PrivateKey::from_bytes(&sk.to_bytes()),
226        };
227        let pk = PrivateKey::new(ssh_key::private::KeypairData::Ed25519(kp), "test-host")
228            .expect("PrivateKey::new");
229        let pem = pk.to_openssh(LineEnding::LF).expect("openssh PEM");
230        let path = dir.join("ssh_host_ed25519_key");
231        std::fs::write(&path, pem.as_bytes()).expect("write key");
232        path
233    }
234
235    #[test]
236    fn csr_pubkey_equals_ssh_host_pubkey() {
237        let dir = tempfile::tempdir().expect("tempdir");
238        let key_path = write_test_ssh_host_key(dir.path());
239        // Read the SSH host key directly so we know the expected pubkey.
240        let raw = std::fs::read_to_string(&key_path).expect("read");
241        let priv_key = PrivateKey::from_openssh(&raw).expect("parse");
242        let expected_pubkey = match priv_key.key_data() {
243            ssh_key::private::KeypairData::Ed25519(kp) => kp.public.0,
244            _ => panic!("not ed25519"),
245        };
246
247        let (_csr, csr_pubkey) =
248            generate_csr_from_ssh_host_key("test-host", &key_path).expect("CSR");
249        assert_eq!(
250            csr_pubkey, expected_pubkey,
251            "CSR pubkey must match SSH host pubkey (RFC-0003 §2 binding)",
252        );
253    }
254
255    #[test]
256    fn renewal_preserves_csr_pubkey_across_calls() {
257        let dir = tempfile::tempdir().expect("tempdir");
258        let key_path = write_test_ssh_host_key(dir.path());
259        let (_csr1, pubkey1) =
260            generate_csr_from_ssh_host_key("test-host", &key_path).expect("CSR 1");
261        let (_csr2, pubkey2) =
262            generate_csr_from_ssh_host_key("test-host", &key_path).expect("CSR 2");
263        assert_eq!(
264            pubkey1, pubkey2,
265            "renewal must reuse the SSH host pubkey (no fresh keypair)",
266        );
267    }
268
269    #[test]
270    fn rejects_non_ed25519_ssh_host_key() {
271        let dir = tempfile::tempdir().expect("tempdir");
272        // Write an RSA-shaped placeholder (using ssh-key's RSA generator
273        // would be heavy; instead we stuff a non-OpenSSH file and expect
274        // the parse error path).
275        let path = dir.path().join("not-an-ssh-key");
276        std::fs::write(&path, b"definitely not OpenSSH PEM").expect("write");
277        let err = generate_csr_from_ssh_host_key("test-host", &path).expect_err("must reject");
278        let msg = format!("{err:#}");
279        assert!(msg.contains("parse OpenSSH key"), "unexpected error: {msg}",);
280    }
281}