nixfleet_reconciler/planner_types.rs
1//! Types consumed/produced by the new planner (RFC-0006 §4.1).
2//!
3//! These types are NEW alongside the existing `reconcile()` /
4//! `gates::*` / `Observed` types — both coexist through Phase 5/6 of
5//! the v0.2 fold. Phase 6 wires CP's runtime applier onto the new
6//! planner and deletes the old path wholesale per RFC-0006 §12.
7//!
8//! Dispatch-path enforcement of "verified data only" lands here: the
9//! planner accepts `SignedManifestSet`, which carries `Verified<T>`
10//! values from Phase 2. A function taking `&SignedManifestSet` is
11//! statically guaranteed to be working with cryptographically verified
12//! payloads — there is no path that constructs a manifest without
13//! going through `nixfleet_reconciler::verify_*`.
14
15use std::collections::{HashMap, HashSet};
16
17use chrono::{DateTime, Utc};
18use nixfleet_proto::{ChannelRef, FleetResolved, RolloutBudget, RolloutManifest};
19use nixfleet_state_machine::HostRolloutState;
20
21use crate::verify::Verified;
22
23pub type ChannelId = String;
24pub type HostId = String;
25pub type ClosureHash = String;
26
27// `RolloutId` is a newtype around `"{channel}@{channel_ref}"`
28// (RFC-0008 §6.3); lives in nixfleet-proto. Re-exported here so
29// callers that already
30// `use nixfleet_reconciler::planner_types::RolloutId` keep working.
31pub use nixfleet_proto::RolloutId;
32
33/// All verified, freshness-validated manifests the planner needs to
34/// reason about the fleet. Constructed by the CP runtime's manifest
35/// poll workers after `verify_artifact` / `verify_rollout_manifest`
36/// gates have passed.
37///
38/// Holding a `&SignedManifestSet` is the planner's proof that every
39/// manifest in scope has passed the trust contract (RFC-0002 §3 +
40/// RFC-0010 §1.5).
41pub struct SignedManifestSet {
42 pub fleet: Verified<FleetResolved>,
43 /// Per-channel signed rollout manifests, keyed by channel name.
44 pub rollouts: HashMap<ChannelId, Verified<RolloutManifest>>,
45}
46
47impl SignedManifestSet {
48 pub fn new(
49 fleet: Verified<FleetResolved>,
50 rollouts: HashMap<ChannelId, Verified<RolloutManifest>>,
51 ) -> Self {
52 Self { fleet, rollouts }
53 }
54
55 pub fn fleet(&self) -> &FleetResolved {
56 self.fleet.inner()
57 }
58}
59
60/// Aggregated view of per-host state the planner consults. Built by the
61/// CP runtime from `host_rollout_records` (Phase 4 schema).
62pub struct FleetState {
63 /// Per-host state, keyed by `(rollout_id, hostname)`. The reducer
64 /// state is the source of truth; this map is a flat view derived
65 /// from `host_rollout_records` via
66 /// `db::HostRolloutRecords::all_for_rollout` per active rollout.
67 pub host_states: HashMap<(RolloutId, HostId), HostRolloutState>,
68
69 pub rollouts: HashMap<RolloutId, RolloutSummary>,
70
71 /// Per-(rollout, host) outstanding enforce-mode probe failure count.
72 /// Populated from `db::probe_failures::outstanding_failing_enforce_probes_by_rollout`
73 /// at `FleetState` construction time (RFC-0007 §7.2). Read by the
74 /// compliance-wave gate; absent entries mean zero failing enforce
75 /// probes (RFC-0005 §6 — no fail-open fallback).
76 pub outstanding_failing_enforce_probes: HashMap<RolloutId, HashMap<HostId, usize>>,
77}
78
79#[derive(Debug, Clone)]
80pub struct RolloutSummary {
81 pub rollout_id: RolloutId,
82 pub channel: ChannelId,
83 pub target_ref: ChannelRef,
84 pub opened_at: DateTime<Utc>,
85 pub terminal_at: Option<DateTime<Utc>>,
86 /// Highest wave index for which at least one host has been dispatched.
87 /// Used by the wave-promotion gate (`host_wave > current_wave` blocks).
88 /// Maintained by the applier; planner reads, never writes.
89 pub current_wave: u32,
90 /// Disruption-budget snapshot frozen at OpenRollout time. Cross-rollout
91 /// in-flight summing matches by selector equality, so reordering the
92 /// fleet's budget list does not reshape enforcement
93 /// (see gates::disruption_budget comments).
94 pub budgets: Vec<RolloutBudget>,
95}
96
97/// Per-channel quarantined-closure set. Populated by the
98/// `InsertQuarantine` applier (after `RemoteRollbackComplete`); read by
99/// the quarantine gate to refuse-to-dispatch a known-bad SHA.
100pub type QuarantineSet = HashMap<ChannelId, HashSet<ClosureHash>>;
101
102/// Planner outputs. The applier interprets each variant against real
103/// I/O (DB writes, queued HTTP responses, metrics).
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum PlanAction {
106 /// A new channel ref has arrived and the planner is opening the
107 /// per-host record set for it. Applier inserts the rollout into
108 /// `rollouts` + creates `host_rollout_records` rows.
109 OpenRollout {
110 rollout_id: RolloutId,
111 channel: ChannelId,
112 target_ref: ChannelRef,
113 },
114
115 /// Queue a Dispatch for a single host on the agent's next long-poll
116 /// to `/v1/agent/dispatch`. Per RFC-0005 §4.1 the payload is
117 /// advisory; agent cross-checks against signed manifest.
118 QueueDispatch {
119 host: HostId,
120 rollout: RolloutId,
121 target_closure: ClosureHash,
122 soak_due_at: DateTime<Utc>,
123 },
124
125 // No `MarkChannelTerminal` variant: terminal transitions are driven
126 // by the rollout reducer (RFC-0008 §3) via
127 // `RolloutEffect::RecordRolloutTransition`, not by the planner.
128 //
129 // No `ClearStaleQuarantine` variant: quarantines are append-only
130 // under the derived-view discipline (RFC-0008 §6.4). Operator-
131 // driven clearance would land as an explicit event matching the
132 // `OperatorClearance` shape.
133 /// Record that a channel was halted (operator-visible status hint).
134 RecordHaltLifted { channel: ChannelId },
135
136 /// A host was eligible for dispatch but a gate blocked it. Applier
137 /// appends an `event_log` entry with `kind = 'gate_decision'` and the
138 /// supplied reason. Does NOT queue any agent-visible work.
139 DeferDispatch {
140 host: HostId,
141 rollout: RolloutId,
142 gate: &'static str,
143 reason: String,
144 },
145}
146
147/// Re-export of the existing rich gate-block enum. The new planner_gates
148/// reuse the same variant set; one canonical type for "why a dispatch
149/// didn't fire" prevents drift between dispatch-time telemetry and
150/// reconcile-time telemetry.
151pub use crate::planner_gates::GateBlock;