nixfleet_control_plane/server/routes/
enrollment.rs

1//! Cert-issuance handlers for enroll and renew.
2
3use std::sync::Arc;
4
5use axum::Json;
6use axum::extract::{Extension, State};
7use axum::http::StatusCode;
8use nixfleet_proto::enroll_wire::{EnrollRequest, EnrollResponse, RenewRequest, RenewResponse};
9use rcgen::PublicKeyData;
10
11use super::super::middleware::AuthenticatedCn;
12use super::super::route_error::{bad_request, bad_request_error, internal};
13use super::super::state::AppState;
14
15/// `POST /v1/enroll` - bootstrap a new fleet host (no mTLS; auth via bootstrap-token signature).
16pub(in crate::server) async fn enroll(
17    State(state): State<Arc<AppState>>,
18    Json(req): Json<EnrollRequest>,
19) -> Result<Json<EnrollResponse>, StatusCode> {
20    let now = chrono::Utc::now();
21
22    let db = state.db.as_ref().ok_or_else(|| {
23        tracing::warn!("enroll: no db configured - endpoint unusable");
24        StatusCode::SERVICE_UNAVAILABLE
25    })?;
26
27    if db
28        .tokens()
29        .token_seen(&req.token.claims.nonce)
30        .map_err(internal("enroll: db token_seen failed"))?
31    {
32        tracing::warn!(nonce = %req.token.claims.nonce, "enroll: token replay rejected");
33        return Err(StatusCode::CONFLICT);
34    }
35
36    // Bootstrap-nonce allowlist enforcement (nixfleet#96).
37    // Strict: any nonce not in the signed allowlist is rejected. After
38    // a state.db wipe, the in-memory allowlist is re-seeded from the
39    // signed artifact on the next poll, so replays are blocked.
40    {
41        let view = state.allowed_nonces.read().await;
42        let entry = view.lookup(&req.token.claims.nonce).ok_or_else(|| {
43            tracing::warn!(
44                target: "issuance",
45                hostname = %req.token.claims.hostname,
46                nonce = %req.token.claims.nonce,
47                "enroll: nonce not in signed allowlist (nonce_not_allowlisted)",
48            );
49            StatusCode::UNAUTHORIZED
50        })?;
51        if entry.hostname != req.token.claims.hostname {
52            tracing::warn!(
53                target: "issuance",
54                hostname = %req.token.claims.hostname,
55                nonce = %req.token.claims.nonce,
56                expected_hostname = %entry.hostname,
57                "enroll: allowlist entry hostname mismatch (nonce_hostname_mismatch)",
58            );
59            return Err(StatusCode::UNAUTHORIZED);
60        }
61        if !crate::db::allowed_nonces::AllowedNoncesView::entry_is_live(entry, now) {
62            tracing::warn!(
63                target: "issuance",
64                hostname = %req.token.claims.hostname,
65                nonce = %req.token.claims.nonce,
66                allowlist_expires_at = %entry.expires_at,
67                "enroll: allowlist entry expired (nonce_allowlist_expired)",
68            );
69            return Err(StatusCode::UNAUTHORIZED);
70        }
71    }
72
73    if now < req.token.claims.issued_at || now >= req.token.claims.expires_at {
74        tracing::warn!(
75            hostname = %req.token.claims.hostname,
76            "enroll: token outside validity window"
77        );
78        return Err(StatusCode::UNAUTHORIZED);
79    }
80
81    // LOADBEARING: re-read trust.json per enroll so operator key rotations propagate without restart.
82    // Single source of truth - the daemon's --trust-file arg, plumbed through IssuancePaths.
83    let trust_path = state.issuance_paths.read().await.trust_path.clone();
84    crate::auth::issuance::verify_bootstrap_token_against_trust(&trust_path, &req.token, now)
85        .map_err(|err| match err {
86            crate::auth::issuance::TrustVerifyError::SignatureMismatch => {
87                tracing::warn!(
88                    hostname = %req.token.claims.hostname,
89                    nonce = %req.token.claims.nonce,
90                    "enroll: {err}",
91                );
92                StatusCode::UNAUTHORIZED
93            }
94            other => {
95                tracing::error!(error = %other, "enroll: trust verification failed");
96                StatusCode::INTERNAL_SERVER_ERROR
97            }
98        })?;
99
100    let csr_params = rcgen::CertificateSigningRequestParams::from_pem(&req.csr_pem)
101        .map_err(bad_request("enroll: parse CSR PEM"))?;
102    let csr_cn: Option<String> = csr_params.params.distinguished_name.iter().find_map(
103        |(t, v): (&rcgen::DnType, &rcgen::DnValue)| {
104            if matches!(t, rcgen::DnType::CommonName) {
105                Some(match v {
106                    rcgen::DnValue::PrintableString(s) => s.to_string(),
107                    rcgen::DnValue::Utf8String(s) => s.to_string(),
108                    _ => format!("{:?}", v),
109                })
110            } else {
111                None
112            }
113        },
114    );
115    let csr_cn = csr_cn.ok_or_else(|| {
116        tracing::warn!("enroll: CSR has no CN");
117        StatusCode::BAD_REQUEST
118    })?;
119    let csr_pubkey_der = csr_params.public_key.der_bytes();
120    let csr_fingerprint = crate::auth::issuance::fingerprint(csr_pubkey_der);
121
122    if let Err(err) = crate::auth::issuance::validate_token_claims(
123        &req.token.claims,
124        &csr_cn,
125        &csr_fingerprint,
126        now,
127    ) {
128        tracing::warn!(error = %err, hostname = %req.token.claims.hostname, "enroll: claim validation");
129        return Err(StatusCode::UNAUTHORIZED);
130    }
131
132    // RFC-0003 §2 binding: CSR pubkey MUST equal the host's declared
133    // SSH host pubkey from fleet.resolved. Closes #43 (cert <--> host key
134    // bond) and #9 (declarative-enrollment fingerprint match) in one
135    // call site. Fail-closed when no fleet snapshot is verified yet
136    // (cold-start race) or when the host has no declared pubkey.
137    let snap = state.verified_fleet.read().await.clone().ok_or_else(|| {
138        tracing::warn!("enroll: no verified fleet snapshot - refusing");
139        StatusCode::SERVICE_UNAVAILABLE
140    })?;
141    let host_decl = snap.fleet.hosts.get(&csr_cn).ok_or_else(|| {
142        tracing::warn!(host = %csr_cn, "enroll: host not declared in fleet.nix");
143        StatusCode::UNAUTHORIZED
144    })?;
145    // FOOTGUN: rcgen 0.13's `PublicKeyData::der_bytes()` returns the
146    // raw 32-byte ed25519 pubkey for ed25519 CSRs (not a 44-byte SPKI
147    // wrapper as RFC 5280 SubjectPublicKeyInfo would suggest). Existing
148    // fingerprint computation already relies on this - pass the bytes
149    // straight to the binding check.
150    if csr_pubkey_der.len() != 32 {
151        tracing::warn!(
152            hostname = %csr_cn,
153            len = csr_pubkey_der.len(),
154            "enroll: CSR pubkey is not 32 raw bytes (non-ed25519 CSR rejected)",
155        );
156        return Err(StatusCode::BAD_REQUEST);
157    }
158    if let Err(err) = crate::auth::issuance::validate_csr_against_fleet_host(
159        csr_pubkey_der,
160        host_decl.pubkey.as_deref(),
161    ) {
162        tracing::warn!(host = %csr_cn, error = %err, "enroll: fleet-pubkey binding check failed");
163        return Err(StatusCode::UNAUTHORIZED);
164    }
165
166    // LOADBEARING: plain INSERT closes the TOCTOU between token_seen() and cert issuance via PK conflict.
167    let outcome = db
168        .tokens()
169        .record_token_nonce(&req.token.claims.nonce, &req.token.claims.hostname)
170        .map_err(internal(
171            "enroll: db record_token_nonce failed; refusing enrollment",
172        ))?;
173    if matches!(outcome, crate::db::RecordTokenOutcome::AlreadyRecorded) {
174        tracing::warn!(
175            nonce = %req.token.claims.nonce,
176            "enroll: token replay detected at record (concurrent enroll race or retry)",
177        );
178        return Err(StatusCode::CONFLICT);
179    }
180
181    let audit_log_path = state.issuance_paths.read().await.audit_log.clone();
182    let signer = match state.ca_signer.read().await.as_ref() {
183        Some(s) => Arc::clone(s),
184        None => {
185            tracing::error!("enroll: CA signer not configured");
186            return Err(StatusCode::INTERNAL_SERVER_ERROR);
187        }
188    };
189    let (cert_pem, not_after) = crate::auth::issuance::issue_cert(
190        &req.csr_pem,
191        signer.as_ref(),
192        state.agent_cert_validity,
193        now,
194        &state.agent_cn_suffix,
195    )
196    .map_err(bad_request_error("enroll: issue_cert failed"))?;
197
198    if let Some(path) = &audit_log_path {
199        // `issued_cn` records the cert's actual CN (canonical
200        // `agent-<machineId>.<suffix>`) - same form the renew path
201        // records, so audit-log rows are uniform across enroll + renew.
202        crate::auth::issuance::audit_log(
203            path,
204            now,
205            "<enroll>",
206            &crate::auth::issuance::canonical_agent_cn(
207                &req.token.claims.hostname,
208                &state.agent_cn_suffix,
209            ),
210            not_after,
211            &crate::auth::issuance::AuditContext::Enroll {
212                token_nonce: req.token.claims.nonce.clone(),
213            },
214        );
215    }
216    tracing::info!(
217        target: "issuance",
218        hostname = %req.token.claims.hostname,
219        not_after = %not_after.to_rfc3339(),
220        "enrolled"
221    );
222
223    Ok(Json(EnrollResponse {
224        cert_pem,
225        not_after,
226    }))
227}
228
229/// `POST /v1/agent/renew` - mTLS-required; verified CN is stamped onto the new cert.
230pub(in crate::server) async fn renew(
231    State(state): State<Arc<AppState>>,
232    Extension(cn): Extension<AuthenticatedCn>,
233    Json(req): Json<RenewRequest>,
234) -> Result<Json<RenewResponse>, StatusCode> {
235    let cn = cn.into_string();
236    let now = chrono::Utc::now();
237
238    // RFC-0003 §2 binding: renewal CSR's pubkey MUST equal the host's
239    // declared SSH host pubkey, identical predicate to enroll. Without
240    // this, renewal would silently let the agent rotate to a fresh
241    // (non-host-bound) keypair - defeating the binding the operator
242    // declared in fleet.nix.
243    let renew_csr_params = rcgen::CertificateSigningRequestParams::from_pem(&req.csr_pem)
244        .map_err(bad_request("renew: parse CSR PEM"))?;
245    let csr_pubkey_der = renew_csr_params.public_key.der_bytes();
246    if csr_pubkey_der.len() != 32 {
247        tracing::warn!(
248            hostname = %cn,
249            len = csr_pubkey_der.len(),
250            "renew: CSR pubkey is not 32 raw bytes (non-ed25519 CSR rejected)",
251        );
252        return Err(StatusCode::BAD_REQUEST);
253    }
254    let snap = state.verified_fleet.read().await.clone().ok_or_else(|| {
255        tracing::warn!("renew: no verified fleet snapshot - refusing");
256        StatusCode::SERVICE_UNAVAILABLE
257    })?;
258    // Verified mTLS CN may be canonical (`agent-<id>.<suffix>`, post-C.3)
259    // or bare machineId (legacy). Strip to bare for the fleet.hosts lookup.
260    let machine_id = crate::auth::issuance::extract_machine_id(&cn, &state.agent_cn_suffix);
261    let host_decl = snap.fleet.hosts.get(&machine_id).ok_or_else(|| {
262        tracing::warn!(host = %cn, machine_id, "renew: host not declared in fleet.nix");
263        StatusCode::UNAUTHORIZED
264    })?;
265    if let Err(err) = crate::auth::issuance::validate_csr_against_fleet_host(
266        csr_pubkey_der,
267        host_decl.pubkey.as_deref(),
268    ) {
269        tracing::warn!(host = %cn, error = %err, "renew: fleet-pubkey binding check failed");
270        return Err(StatusCode::UNAUTHORIZED);
271    }
272
273    let audit_log_path = state.issuance_paths.read().await.audit_log.clone();
274    let signer = match state.ca_signer.read().await.as_ref() {
275        Some(s) => Arc::clone(s),
276        None => return Err(StatusCode::INTERNAL_SERVER_ERROR),
277    };
278
279    let (cert_pem, not_after) = crate::auth::issuance::issue_cert(
280        &req.csr_pem,
281        signer.as_ref(),
282        state.agent_cert_validity,
283        now,
284        &state.agent_cn_suffix,
285    )
286    .map_err(bad_request_error("renew: issue_cert failed"))?;
287
288    if let Some(path) = &audit_log_path {
289        crate::auth::issuance::audit_log(
290            path,
291            now,
292            &cn,
293            &cn,
294            not_after,
295            &crate::auth::issuance::AuditContext::Renew {
296                previous_cert_serial: "<unknown>".to_string(),
297            },
298        );
299    }
300    tracing::info!(
301        target: "issuance",
302        hostname = %cn,
303        not_after = %not_after.to_rfc3339(),
304        "renewed"
305    );
306
307    Ok(Json(RenewResponse {
308        cert_pem,
309        not_after,
310    }))
311}