nixfleet_proto/
fleet_view.rs

1//! Read-model views served by CP for operator-facing consumers (`/v1/hosts`,
2//! CLI, metrics exporter). Outstanding-event counts apply resolution-by-
3//! replacement (events from older rollouts are considered resolved).
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::{HostRolloutState, RolloutId};
9
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "camelCase")]
12pub struct HostStatusEntry {
13    pub hostname: String,
14    pub channel: String,
15    #[serde(default)]
16    pub declared_closure_hash: Option<String>,
17    #[serde(default)]
18    pub current_closure_hash: Option<String>,
19    #[serde(default)]
20    pub pending_closure_hash: Option<String>,
21    #[serde(default)]
22    pub last_checkin_at: Option<DateTime<Utc>>,
23    #[serde(default)]
24    pub last_rollout_id: Option<String>,
25    pub converged: bool,
26    pub outstanding_compliance_failures: usize,
27    pub outstanding_runtime_gate_errors: usize,
28    pub verified_event_count: usize,
29    /// Reported by the agent at every checkin. Surfaces crash-loops that
30    /// don't show up as offline (low value despite recent `last_checkin_at`).
31    #[serde(default)]
32    pub last_uptime_secs: Option<u64>,
33    /// Per-host rollout state for the channel's CURRENT rolloutId (computed
34    /// from verified_fleet, not the agent-reported `last_rollout_id` which
35    /// may be stale). `None` until the host transitions in a freshly opened
36    /// rollout.
37    #[serde(default)]
38    pub rollout_state: Option<HostRolloutState>,
39    /// Agent posted `ActivationDeferred`: profile is set but a critical-
40    /// component swap forced a reboot to finish activation. Cleared once
41    /// the host converges.
42    #[serde(default)]
43    pub pending_reboot: bool,
44    /// Agent posted `ClosureQuarantined`: this closure failed activation and
45    /// the agent stopped retrying. Cleared automatically when the channel-ref
46    /// advances to a fresher closure_hash.
47    #[serde(default)]
48    pub quarantined_closure: Option<String>,
49    /// Active operator pin. Populated from `hosts.<name>.pin` in the fleet
50    /// snapshot, pre-filtered for expiry by `nixfleet-release` - non-expired
51    /// at signing time by construction.
52    #[serde(default)]
53    pub pin: Option<crate::Pin>,
54    /// Health probes currently in non-Pass state (`Fail` and `Unknown` both
55    /// count). `0` when no probes declared, all probes passing, or mode is
56    /// permissive/disabled.
57    #[serde(default)]
58    pub outstanding_health_failures: usize,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
62pub struct HostsResponse {
63    pub hosts: Vec<HostStatusEntry>,
64}
65
66/// Per-host summary of a single rollout — one entry per `(rollout, host)`
67/// pair, sorted by wave then hostname. Operator-facing view: "what
68/// state is each host in for this rollout?"
69///
70/// Distinct from [`RolloutEvents`], which projects the chronological
71/// `event_log` stream for the same rollout (engineer-facing replay
72/// surface; RFC-0005 §10.5 + Plan 04).
73#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
74pub struct RolloutHosts {
75    pub rollout_id: RolloutId,
76    pub hosts: Vec<RolloutHostEntry>,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
80#[serde(rename_all = "camelCase")]
81pub struct RolloutHostEntry {
82    pub host: String,
83    pub channel: String,
84    pub wave: u32,
85    pub target_closure_hash: String,
86    pub target_channel_ref: String,
87    /// RFC3339, kept as string so malformed historical rows surface to the
88    /// operator instead of being masked by a re-parse.
89    pub dispatched_at: String,
90    /// `None` while the dispatch is still open (no confirm, no rollback).
91    #[serde(default)]
92    pub terminal_state: Option<String>,
93    #[serde(default)]
94    pub terminal_at: Option<String>,
95}
96
97/// Chronological event-log stream for a single rollout — every row in
98/// `event_log WHERE rollout_id = ? ORDER BY seq ASC`. Engineer-facing
99/// replay surface (RFC-0005 §10.5 + Plan 04 §"Event log schema"):
100/// reproduces the per-host state evolution by replaying these entries
101/// through `nixfleet_state_machine::step`.
102///
103/// `payload` is parsed JSON (not the escaped string the DB stores). The
104/// shape inside `payload` is determined by `kind`:
105/// - `kind = "agent_event"` → an `OutboundAgentEvent` variant payload
106/// - `kind = "plan_action"` → a `PlanAction` variant
107/// - `kind = "effect"`      → an `Effect` variant
108/// - `kind = "gate_decision"` → `{ host, rollout, gate, reason }`
109/// - `kind = "verify_outcome"` / `"manifest_poll"` → producer-side shapes
110#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
111pub struct RolloutEvents {
112    pub rollout_id: RolloutId,
113    pub events: Vec<RolloutEventEntry>,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
117#[serde(rename_all = "camelCase")]
118pub struct RolloutEventEntry {
119    pub seq: i64,
120    /// RFC3339, caller-supplied (no SQL DEFAULT — see Phase 4 fix
121    /// `f3fcb213`).
122    pub ts: String,
123    pub kind: String,
124    #[serde(default)]
125    pub host: Option<String>,
126    /// Parsed JSON. The `event_log` column stores a JSON-validated
127    /// string; the route parses on-read so consumers get structured
128    /// data without a second deserialise step.
129    pub payload: serde_json::Value,
130}