nixfleet_agent/runtime/workers/probe_runners/
evidence.rs

1//! Evidence probe runner (RFC-0007 §3.1 + §7). READ-ONLY consumer of
2//! the local collector unit's signed evidence file.
3//!
4//! The collector (compliance-evidence-collector.service) owns its own
5//! systemd timer + cadence; this runner does NOT trigger it. On each
6//! tick the runner:
7//! 1. Reads `evidence_path` (default `/var/lib/nixfleet-compliance/evidence.json`).
8//! 2. Verifies the ed25519 signature against the host's SSH ed25519
9//!    public key half (RFC-0009 §5). Signature is read from
10//!    `<path>.sig` (base64 64-byte sig of the JCS canonical bytes of
11//!    the payload's `controls` array).
12//! 3. Filters `controls` to `framework == decl.framework` and produces
13//!    per-control sub_results. Aggregate Pass iff every framework-
14//!    matching control is Pass.
15//!
16//! Any error (file missing, parse, signature mismatch, framework
17//! missing) → `Fail` (RFC-0007 §6 uniform strict mode).
18
19use base64::Engine as _;
20use chrono::{DateTime, Utc};
21use ed25519_dalek::{Signature, Verifier, VerifyingKey};
22use nixfleet_proto::evidence::{EvidenceFile, SCHEMA_VERSION};
23use nixfleet_state_machine::{ProbeMode, ProbeStatus, ProbeSubResult};
24use std::path::Path;
25
26use super::{ProbeDecl, RunnerOutcome};
27
28/// Probe-level fallback mode parsed from `ProbeDecl.mode`. Used by the
29/// per-control effective-mode resolver when neither
30/// `controls`/`controlOverrides` declares a control-specific value.
31fn probe_level_mode(decl: &ProbeDecl) -> ProbeMode {
32    match decl.mode.as_str() {
33        "observe" => ProbeMode::Observe,
34        "disabled" => ProbeMode::Disabled,
35        _ => ProbeMode::Enforce,
36    }
37}
38
39pub async fn run(decl: &ProbeDecl, now: DateTime<Utc>) -> RunnerOutcome {
40    // Selection mode:
41    //   - `framework` set → traditional whole-framework probe (with
42    //     optional per-control overrides from `controlOverrides`).
43    //   - `controls` non-empty → custom-framework probe (explicit
44    //     control list with per-control modes). Native framework
45    //     stays on the wire via ProbeSubResult.framework for
46    //     auditor visibility.
47    // Validation at fleet-eval time (lib/mk-fleet.nix) enforces XOR;
48    // runtime check is defence-in-depth.
49    let framework_filter = decl.framework.as_deref();
50    let explicit_controls_present = !decl.controls.is_empty();
51    if framework_filter.is_none() && !explicit_controls_present {
52        return RunnerOutcome::fail(
53            now,
54            "evidence probe: neither framework nor controls declared",
55        );
56    }
57    if framework_filter.is_some() && explicit_controls_present {
58        return RunnerOutcome::fail(
59            now,
60            "evidence probe: framework and controls both set (XOR violation)",
61        );
62    }
63    let evidence_path = Path::new(&decl.evidence_path);
64    let sig_path = evidence_path.with_extension("json.sig");
65
66    let payload_bytes = match tokio::fs::read(evidence_path).await {
67        Ok(b) => b,
68        Err(err) => {
69            return RunnerOutcome::fail(
70                now,
71                format!("evidence probe: read {}: {err}", evidence_path.display()),
72            );
73        }
74    };
75    let sig_b64 = match tokio::fs::read_to_string(&sig_path).await {
76        Ok(s) => s.trim().to_string(),
77        Err(err) => {
78            return RunnerOutcome::fail(
79                now,
80                format!("evidence probe: read {}: {err}", sig_path.display()),
81            );
82        }
83    };
84
85    // Host SSH host pubkey (RFC-0009 §5). Read from a conventional
86    // path; the agent's main.rs --ssh-host-key-file points at the
87    // PRIVATE half, the public half is alongside as `.pub` per
88    // OpenSSH convention. The agent verifies the signature against
89    // the public half here.
90    let pubkey_bytes = match resolve_host_pubkey().await {
91        Ok(b) => b,
92        Err(reason) => return RunnerOutcome::fail(now, format!("evidence probe: {reason}")),
93    };
94    let vk = match VerifyingKey::from_bytes(&pubkey_bytes) {
95        Ok(v) => v,
96        Err(err) => {
97            return RunnerOutcome::fail(now, format!("evidence probe: pubkey parse: {err}"));
98        }
99    };
100    let sig_bytes = match base64::engine::general_purpose::STANDARD.decode(&sig_b64) {
101        Ok(b) => b,
102        Err(err) => return RunnerOutcome::fail(now, format!("evidence probe: sig base64: {err}")),
103    };
104    let Ok(sig_arr) = <[u8; 64]>::try_from(sig_bytes.as_slice()) else {
105        return RunnerOutcome::fail(
106            now,
107            format!("evidence probe: sig length {} != 64", sig_bytes.len()),
108        );
109    };
110    let sig = Signature::from_bytes(&sig_arr);
111
112    // LOADBEARING: signature is over JCS-canonical bytes (per
113    // `nixfleet-compliance-tools/src/lib.rs::sign_evidence` and
114    // `docs/evidence-format.md`), not over the on-disk bytes.
115    // probe-runner.sh writes evidence.json via `jq` which produces
116    // pretty-printed JSON; the signer canonicalises before signing.
117    // Verifying against `payload_bytes` (the file as-read) fails
118    // unconditionally because the bytes differ. Re-canonicalise here
119    // so the verifier signs the same bytes the signer did.
120    let parsed: EvidenceFile = match serde_json::from_slice(&payload_bytes) {
121        Ok(p) => p,
122        Err(err) => return RunnerOutcome::fail(now, format!("evidence probe: parse: {err}")),
123    };
124    let canonical_bytes = match serde_jcs::to_vec(&parsed) {
125        Ok(b) => b,
126        Err(err) => {
127            return RunnerOutcome::fail(now, format!("evidence probe: canonicalise: {err}"));
128        }
129    };
130    if vk.verify(&canonical_bytes, &sig).is_err() {
131        return RunnerOutcome::fail(now, "evidence probe: signature verify failed");
132    }
133    if parsed.schema_version != SCHEMA_VERSION {
134        return RunnerOutcome::fail(
135            now,
136            format!(
137                "evidence probe: schemaVersion {} unsupported (agent expects {SCHEMA_VERSION}); \
138                 upgrade nixfleet-compliance",
139                parsed.schema_version,
140            ),
141        );
142    }
143    // Parse + verify-against-canonical happened above; both consume
144    // `parsed`/`payload_bytes`. The remainder of the runner uses
145    // `parsed` directly.
146
147    let probe_mode = probe_level_mode(decl);
148
149    // Expand the one-entry-per-control wire shape into one
150    // ProbeSubResult per (control, framework, article) tuple. Each
151    // sub-result carries the resolved effective_mode so the CP-side
152    // probe_failures applier (RFC-0007 §7.2) can gate by control
153    // rather than by whole probe. Controls with effective_mode =
154    // Disabled are dropped entirely (no event_log noise for opted-
155    // out controls).
156    let mut sub_results: Vec<ProbeSubResult> = Vec::new();
157    for entry in &parsed.controls {
158        let (effective_mode, override_reason) =
159            resolve_effective_mode(decl, &entry.control_id, probe_mode, framework_filter);
160        if matches!(effective_mode, ProbeMode::Disabled) {
161            continue;
162        }
163        let status = if entry.passed {
164            ProbeStatus::Pass
165        } else {
166            ProbeStatus::Fail
167        };
168        push_entry_sub_results(
169            &mut sub_results,
170            entry,
171            framework_filter,
172            status,
173            effective_mode,
174            override_reason.as_deref(),
175        );
176    }
177    if sub_results.is_empty() {
178        let context = match framework_filter {
179            Some(f) => format!("evidence probe: no controls match framework '{f}'"),
180            None => "evidence probe: no controls matched the explicit selection".to_string(),
181        };
182        return RunnerOutcome::fail(now, context);
183    }
184
185    // Aggregate Pass only over enforce-mode sub-results. Observe-mode
186    // failures stay on the wire for visibility but do not fail the
187    // probe overall; the wave gate consults the per-row effective_mode
188    // on the CP side.
189    let enforce_subs: Vec<&ProbeSubResult> = sub_results
190        .iter()
191        .filter(|s| matches!(s.effective_mode, ProbeMode::Enforce))
192        .collect();
193    let all_enforce_pass = enforce_subs
194        .iter()
195        .all(|s| matches!(s.status, ProbeStatus::Pass));
196    let aggregate_status = if all_enforce_pass {
197        ProbeStatus::Pass
198    } else {
199        ProbeStatus::Fail
200    };
201    RunnerOutcome {
202        status: aggregate_status,
203        observed_at: now,
204        failure_reason: if all_enforce_pass {
205            None
206        } else {
207            let descriptor = framework_filter.unwrap_or("custom-controls");
208            Some(format!(
209                "evidence probe: {}: at least one enforce-mode control failed",
210                descriptor
211            ))
212        },
213        sub_results: Some(sub_results),
214    }
215}
216
217/// Resolve effective mode for a control by consulting the probe's
218/// `controls` map (custom-framework declaration) first, then
219/// `controlOverrides` (per-framework override), then falling back to
220/// the probe-level mode. For framework probes, controls whose
221/// frameworkArticles don't cover the probe's framework are skipped at
222/// the caller — this fn assumes the control is in scope. Returns the
223/// resolved mode plus the operator's audit rationale (`reason`) when
224/// an override applied; `None` when the probe-level mode was the
225/// fallback (no per-control override declared).
226fn resolve_effective_mode(
227    decl: &ProbeDecl,
228    control_id: &str,
229    probe_mode: ProbeMode,
230    framework_filter: Option<&str>,
231) -> (ProbeMode, Option<String>) {
232    if framework_filter.is_some() {
233        if let Some(o) = decl.control_overrides.get(control_id) {
234            return (o.resolved_mode(), Some(o.reason.clone()));
235        }
236        return (probe_mode, None);
237    }
238    // Custom-framework (controls map) declaration. Only listed controls
239    // contribute; the listed entry's mode is the effective mode (no
240    // fallback to probe-level mode — operators declare each one).
241    if let Some(c) = decl.controls.get(control_id) {
242        return (c.resolved_mode(), Some(c.reason.clone()));
243    }
244    // Control not in the explicit list → drop it (mark Disabled so
245    // the caller's filter excludes it from sub_results).
246    (ProbeMode::Disabled, None)
247}
248
249/// Expand one EvidenceControlEntry into ProbeSubResults respecting the
250/// probe's selection mode. Pushes one sub-result per (framework,
251/// article) tuple in scope. `override_reason` carries the operator's
252/// audit rationale (when an override applied) onto every sub-result
253/// the entry produces; the value is shared across all per-article
254/// rows because the override is on the control, not the article.
255fn push_entry_sub_results(
256    sub_results: &mut Vec<ProbeSubResult>,
257    entry: &nixfleet_proto::evidence::EvidenceControlEntry,
258    framework_filter: Option<&str>,
259    status: ProbeStatus,
260    effective_mode: ProbeMode,
261    override_reason: Option<&str>,
262) {
263    let reason = override_reason.map(|s| s.to_string());
264    if let Some(framework) = framework_filter {
265        // Whole-framework probe: emit one sub-result per article of
266        // this framework. Controls not covering the framework were
267        // filtered out at the caller via resolve_effective_mode +
268        // the framework_articles lookup below.
269        let Some(articles) = entry.framework_articles.get(framework) else {
270            return;
271        };
272        if articles.is_empty() {
273            sub_results.push(ProbeSubResult {
274                control_id: entry.control_id.clone(),
275                status,
276                framework: framework.to_string(),
277                article: None,
278                effective_mode,
279                override_reason: reason.clone(),
280            });
281        } else {
282            for article in articles {
283                sub_results.push(ProbeSubResult {
284                    control_id: entry.control_id.clone(),
285                    status,
286                    framework: framework.to_string(),
287                    article: Some(article.clone()),
288                    effective_mode,
289                    override_reason: reason.clone(),
290                });
291            }
292        }
293    } else {
294        // Custom-framework declaration. Emit one sub-result per
295        // (framework, article) tuple from the control's native
296        // frameworkArticles map. If the control has no native
297        // framework (synthetic / smoke), emit a single sub-result
298        // with framework="custom" and article=None so the CP gate
299        // still sees the control.
300        if entry.framework_articles.is_empty() {
301            sub_results.push(ProbeSubResult {
302                control_id: entry.control_id.clone(),
303                status,
304                framework: "custom".to_string(),
305                article: None,
306                effective_mode,
307                override_reason: reason.clone(),
308            });
309        } else {
310            for (framework, articles) in &entry.framework_articles {
311                if articles.is_empty() {
312                    sub_results.push(ProbeSubResult {
313                        control_id: entry.control_id.clone(),
314                        status,
315                        framework: framework.clone(),
316                        article: None,
317                        effective_mode,
318                        override_reason: reason.clone(),
319                    });
320                } else {
321                    for article in articles {
322                        sub_results.push(ProbeSubResult {
323                            control_id: entry.control_id.clone(),
324                            status,
325                            framework: framework.clone(),
326                            article: Some(article.clone()),
327                            effective_mode,
328                            override_reason: reason.clone(),
329                        });
330                    }
331                }
332            }
333        }
334    }
335}
336
337async fn resolve_host_pubkey() -> Result<[u8; 32], String> {
338    // The agent's CLI accepts `--ssh-host-key-file` (defaulting to
339    // `/etc/ssh/ssh_host_ed25519_key`). The matching public half lives
340    // alongside as `<path>.pub`. We read the public file here so the
341    // probe runner doesn't need the private key (which it has no
342    // business with).
343    let priv_path = std::env::var("NIXFLEET_AGENT_SSH_HOST_KEY_FILE")
344        .unwrap_or_else(|_| "/etc/ssh/ssh_host_ed25519_key".to_string());
345    let pub_path = format!("{priv_path}.pub");
346    let raw = tokio::fs::read_to_string(&pub_path)
347        .await
348        .map_err(|err| format!("read host pubkey {pub_path}: {err}"))?;
349    // OpenSSH public key format: "ssh-ed25519 <base64-blob> <comment>".
350    let blob_b64 = raw
351        .split_whitespace()
352        .nth(1)
353        .ok_or_else(|| format!("malformed host pubkey at {pub_path}"))?;
354    let blob = base64::engine::general_purpose::STANDARD
355        .decode(blob_b64)
356        .map_err(|err| format!("host pubkey base64 decode: {err}"))?;
357    // The blob is a length-prefixed wire format; the last 32 bytes are
358    // the raw ed25519 public key (RFC-0009 §5 / RFC 4253).
359    if blob.len() < 32 {
360        return Err(format!("host pubkey blob len {} < 32", blob.len()));
361    }
362    let mut out = [0u8; 32];
363    out.copy_from_slice(&blob[blob.len() - 32..]);
364    Ok(out)
365}
366
367// Wire shape lives in nixfleet_proto::evidence::EvidenceFile so the
368// auditor verifier (nixfleet-compliance-verify) and compliance-check
369// CLI consume the same canonical schema. Drift between producer +
370// consumer is a compile error rather than a runtime parse failure.
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::runtime::workers::probe_runners::ControlOverrideDecl;
376    use nixfleet_proto::evidence::EvidenceControlEntry;
377    use std::collections::HashMap;
378
379    fn base_decl(framework: Option<&str>) -> ProbeDecl {
380        ProbeDecl {
381            kind: "evidence".into(),
382            mode: "enforce".into(),
383            interval_seconds: 30,
384            run_once: false,
385            url: None,
386            expect_status: 200,
387            host: None,
388            port: None,
389            connect_timeout_secs: 5,
390            command: Vec::new(),
391            timeout_secs: 10,
392            framework: framework.map(|s| s.to_string()),
393            evidence_path: "/var/lib/nixfleet-compliance/evidence.json".into(),
394            control_overrides: HashMap::new(),
395            controls: HashMap::new(),
396        }
397    }
398
399    fn entry(
400        control_id: &str,
401        passed: bool,
402        framework: &str,
403        articles: &[&str],
404    ) -> EvidenceControlEntry {
405        let mut fa = HashMap::new();
406        fa.insert(
407            framework.to_string(),
408            articles.iter().map(|s| s.to_string()).collect(),
409        );
410        EvidenceControlEntry {
411            control_id: control_id.into(),
412            passed,
413            framework_articles: fa,
414            details: None,
415            schema: None,
416        }
417    }
418
419    #[test]
420    fn resolve_effective_mode_framework_probe_no_overrides_uses_probe_mode() {
421        let decl = base_decl(Some("nis2"));
422        let (m, r) =
423            resolve_effective_mode(&decl, "access-control", ProbeMode::Enforce, Some("nis2"));
424        assert_eq!(m, ProbeMode::Enforce);
425        assert_eq!(r, None, "no override → no reason");
426    }
427
428    #[test]
429    fn resolve_effective_mode_framework_probe_override_wins_over_probe_mode() {
430        let mut decl = base_decl(Some("nis2"));
431        decl.control_overrides.insert(
432            "access-control".into(),
433            ControlOverrideDecl {
434                mode: "observe".into(),
435                reason: "Phase-out".into(),
436            },
437        );
438        let (m, r) =
439            resolve_effective_mode(&decl, "access-control", ProbeMode::Enforce, Some("nis2"));
440        assert_eq!(m, ProbeMode::Observe);
441        assert_eq!(r.as_deref(), Some("Phase-out"));
442    }
443
444    #[test]
445    fn resolve_effective_mode_custom_controls_unlisted_dropped() {
446        let mut decl = base_decl(None);
447        decl.controls.insert(
448            "access-control".into(),
449            ControlOverrideDecl {
450                mode: "enforce".into(),
451                reason: String::new(),
452            },
453        );
454        // Unlisted control → Disabled (filtered out downstream).
455        let (unlisted, _) =
456            resolve_effective_mode(&decl, "secure-boot", ProbeMode::Enforce, None);
457        assert_eq!(unlisted, ProbeMode::Disabled);
458        // Listed control → its declared mode + (empty) reason.
459        let (listed, reason) =
460            resolve_effective_mode(&decl, "access-control", ProbeMode::Enforce, None);
461        assert_eq!(listed, ProbeMode::Enforce);
462        assert_eq!(reason.as_deref(), Some(""));
463    }
464
465    #[test]
466    fn push_entry_framework_probe_one_sub_result_per_article() {
467        let e = entry("access-control", true, "nis2", &["21.i", "21.j"]);
468        let mut subs = Vec::new();
469        push_entry_sub_results(
470            &mut subs,
471            &e,
472            Some("nis2"),
473            ProbeStatus::Pass,
474            ProbeMode::Enforce,
475            None,
476        );
477        assert_eq!(subs.len(), 2);
478        assert!(subs.iter().all(|s| s.framework == "nis2"));
479        assert!(subs.iter().all(|s| s.control_id == "access-control"));
480        assert!(subs.iter().all(|s| s.override_reason.is_none()));
481    }
482
483    #[test]
484    fn push_entry_framework_probe_skips_when_framework_absent_from_articles() {
485        let e = entry("access-control", true, "nis2", &["21.i"]);
486        let mut subs = Vec::new();
487        push_entry_sub_results(
488            &mut subs,
489            &e,
490            Some("iso27001"),
491            ProbeStatus::Pass,
492            ProbeMode::Enforce,
493            None,
494        );
495        assert!(subs.is_empty());
496    }
497
498    #[test]
499    fn push_entry_custom_emits_all_native_frameworks() {
500        let mut e = entry("access-control", false, "nis2", &["21.i"]);
501        e.framework_articles
502            .insert("iso27001".into(), vec!["A.5.1".into()]);
503        let mut subs = Vec::new();
504        push_entry_sub_results(
505            &mut subs,
506            &e,
507            None,
508            ProbeStatus::Fail,
509            ProbeMode::Enforce,
510            None,
511        );
512        assert_eq!(subs.len(), 2);
513        let frameworks: std::collections::HashSet<_> =
514            subs.iter().map(|s| s.framework.clone()).collect();
515        assert!(frameworks.contains("nis2"));
516        assert!(frameworks.contains("iso27001"));
517        assert!(subs.iter().all(|s| s.effective_mode == ProbeMode::Enforce));
518    }
519
520    #[test]
521    fn push_entry_override_reason_propagates_to_every_sub_result() {
522        // Bug D-style audit-trail regression: when an operator
523        // declares `controlOverrides[ac] = { mode = observe; reason
524        // = "Phase-out"; }`, every sub_result for that control (one
525        // per article) must carry the reason so the CP event_log
526        // payload preserves it across the wire.
527        let e = entry("access-control", false, "nis2", &["21.i", "21.j"]);
528        let mut subs = Vec::new();
529        push_entry_sub_results(
530            &mut subs,
531            &e,
532            Some("nis2"),
533            ProbeStatus::Fail,
534            ProbeMode::Observe,
535            Some("Phase-out window"),
536        );
537        assert_eq!(subs.len(), 2);
538        assert!(
539            subs.iter()
540                .all(|s| s.override_reason.as_deref() == Some("Phase-out window"))
541        );
542    }
543
544    #[test]
545    fn push_entry_custom_synthetic_no_framework_articles() {
546        // Control with empty frameworkArticles (e.g. the always-fail
547        // synthetic control). Custom-framework selection emits one
548        // sub-result with framework="custom".
549        let e = EvidenceControlEntry {
550            control_id: "synthetic".into(),
551            passed: false,
552            framework_articles: HashMap::new(),
553            details: None,
554            schema: None,
555        };
556        let mut subs = Vec::new();
557        push_entry_sub_results(
558            &mut subs,
559            &e,
560            None,
561            ProbeStatus::Fail,
562            ProbeMode::Enforce,
563            None,
564        );
565        assert_eq!(subs.len(), 1);
566        assert_eq!(subs[0].framework, "custom");
567        assert_eq!(subs[0].article, None);
568    }
569
570    /// Regression guard: the agent's evidence runner must verify the
571    /// signature over JCS-canonical bytes, not over the on-disk file
572    /// bytes. The compliance collector signs the canonical form (per
573    /// `nixfleet-compliance-tools/src/lib.rs::sign_evidence`); the
574    /// on-disk JSON is pretty-printed by `jq`. Verifying against the
575    /// on-disk bytes fails every time because the two byte sequences
576    /// differ. This test pins that they differ so any future change
577    /// that re-introduces the bug fails loudly here.
578    #[test]
579    fn canonical_bytes_differ_from_pretty_printed() {
580        use nixfleet_proto::evidence::{EvidenceControlEntry, EvidenceFile, SCHEMA_VERSION};
581        let mut fa = HashMap::new();
582        fa.insert("nis2-essential".to_string(), vec!["art21.i".to_string()]);
583        let file = EvidenceFile {
584            schema_version: SCHEMA_VERSION,
585            hostname: "agent-01".to_string(),
586            collected_at: chrono::Utc::now(),
587            controls: vec![EvidenceControlEntry {
588                control_id: "access-control".to_string(),
589                passed: true,
590                framework_articles: fa,
591                details: Some(serde_json::json!({"k": "v"})),
592                schema: None,
593            }],
594        };
595
596        // Pretty-printed (what jq writes; what payload_bytes contains
597        // when the agent reads the file).
598        let pretty = serde_json::to_vec_pretty(&file).unwrap();
599        // Canonical (what the compliance collector signs).
600        let canonical = serde_jcs::to_vec(&file).unwrap();
601        assert_ne!(
602            pretty, canonical,
603            "pretty-printed and JCS-canonical bytes MUST differ; if they ever \
604             converge the agent's verify path becomes a no-op signature check",
605        );
606
607        // Round-trip stability: canonicalising the canonical bytes is a
608        // fixed point. The agent's verify calls serde_jcs::to_vec on a
609        // freshly-parsed EvidenceFile; this asserts the result is what
610        // the signer signed.
611        let reparsed: EvidenceFile = serde_json::from_slice(&canonical).unwrap();
612        let recanonical = serde_jcs::to_vec(&reparsed).unwrap();
613        assert_eq!(canonical, recanonical);
614    }
615}