diff --git a/hscontrol/servertest/ha_property_test.go b/hscontrol/servertest/ha_property_test.go new file mode 100644 index 00000000..712a1cf0 --- /dev/null +++ b/hscontrol/servertest/ha_property_test.go @@ -0,0 +1,642 @@ +package servertest_test + +import ( + "context" + "fmt" + "net/netip" + "slices" + "sync" + "testing" + "time" + + "github.com/juanfont/headscale/hscontrol/servertest" + "github.com/juanfont/headscale/hscontrol/state" + "github.com/juanfont/headscale/hscontrol/types" + "github.com/juanfont/headscale/hscontrol/util" + "github.com/stretchr/testify/require" + "pgregory.net/rapid" + "tailscale.com/tailcfg" +) + +// haPropClient bundles a TestClient with the node ID and route it +// advertises. The property test mutates connection state through +// these handles and inspects the resulting NodeStore snapshot. +type haPropClient struct { + tc *servertest.TestClient + id types.NodeID + name string + route netip.Prefix + // connected mirrors the prober's view (mapBatcher.IsConnected). + // Tracked alongside the runtime read so anti-flap invariants can + // reason about the candidate set the prober actually evaluates. + connected bool + // freshSinceReconnect is true between a successful Reconnect and + // the prober's second observation of the new SessionEpoch — the + // window in which the session-stability guard must defer instead + // of installing an Unhealthy bit. Reset by the first ProberTick + // that runs against the new session; cleared on Disconnect. + freshSinceReconnect bool +} + +// checkTB embeds *testing.T to satisfy testing.TB (which requires an +// unexported method only the testing package can provide) while +// collecting Cleanup functions in a local LIFO stack. runCleanups() +// runs them when the rapid check body exits so each check's resources +// are released immediately, not at the outer test's teardown — which +// would otherwise accumulate hundreds of servers and tens of thousands +// of goroutines across a 300-check run. +// +// Fatal/Fatalf still call through to *testing.T and fail the outer +// test. Inside a rapid check use rt.Fatalf instead so rapid can shrink +// the failing op sequence. +type checkTB struct { + *testing.T + + mu sync.Mutex + cleanups []func() +} + +func (c *checkTB) Cleanup(fn func()) { + c.mu.Lock() + defer c.mu.Unlock() + + c.cleanups = append(c.cleanups, fn) +} + +func (c *checkTB) runCleanups() { + c.mu.Lock() + cs := c.cleanups + c.cleanups = nil + c.mu.Unlock() + + for i := len(cs) - 1; i >= 0; i-- { + cs[i]() + } +} + +// haReadvertise pushes hostinfo + approved routes again. Called after +// every Reconnect so the new poll session re-publishes the prefix. The +// controlclient.Direct carries the SetHostinfo state across +// re-registration, but pushing again removes a race where the test +// inspects PrimaryRoutes before the initial map of the new session +// landed. +func haReadvertise( + tb testing.TB, + srv *servertest.TestServer, + c *haPropClient, +) { + tb.Helper() + + c.tc.Direct().SetHostinfo(&tailcfg.Hostinfo{ + BackendLogID: "servertest-" + c.name, + Hostname: c.name, + RoutableIPs: []netip.Prefix{c.route}, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + _ = c.tc.Direct().SendUpdate(ctx) + + _, ch, err := srv.State().SetApprovedRoutes(c.id, []netip.Prefix{c.route}) + require.NoError(tb, err) + srv.App.Change(ch) +} + +// readPrimaries builds the prefix→owner map by inverting +// State.GetNodePrimaryRoutes for every client in the fleet. The State +// API does not expose the raw snapshot map directly; iterating the +// known clients reconstructs it without poking at NodeStore internals. +// Any inconsistency between routes and isPrimaryRoute surfaces as a +// duplicate ownership claim and trips invariant 4. +func readPrimaries( + rt *rapid.T, + srv *servertest.TestServer, + clients []*haPropClient, +) map[netip.Prefix]types.NodeID { + rt.Helper() + + out := make(map[netip.Prefix]types.NodeID) + + for _, c := range clients { + for _, p := range srv.State().GetNodePrimaryRoutes(c.id) { + if prev, ok := out[p]; ok { + rt.Fatalf( + "prefix %s has two owners: %d and %d "+ + "(GetNodePrimaryRoutes claimed both)", + p, prev, c.id, + ) + } + + out[p] = c.id + } + } + + return out +} + +// checkHAInvariants walks every prefix → primary mapping in the live +// snapshot and asserts the six properties documented in the test +// header. prevPrimaries is the snapshot taken before the just-applied +// op so anti-flap can compare moves. +// +//nolint:gocyclo // invariant checker over several independent properties +func checkHAInvariants( + rt *rapid.T, + srv *servertest.TestServer, + clients []*haPropClient, + prevPrimaries map[netip.Prefix]types.NodeID, + skipAntiFlap bool, +) { + rt.Helper() + + st := srv.State() + primaries := readPrimaries(rt, srv, clients) + + // Collect every prefix advertised by the fleet so the all-down + // honesty check can iterate them, even prefixes with zero current + // advertisers. + prefixSet := make(map[netip.Prefix]struct{}) + for _, c := range clients { + prefixSet[c.route] = struct{}{} + } + + // Build the candidate set per prefix from the NodeStore: a node is a + // candidate when it is IsOnline=true and AllApprovedRoutes contains + // the prefix. This is exactly the input to electPrimaryRoutes; the + // prober's mapBatcher.IsConnected view is a separate gate the + // prober consults before dispatching a probe, not part of the + // election input. + advertisersByPrefix := make(map[netip.Prefix][]types.NodeID) + + for _, c := range clients { + nv, ok := st.GetNodeByID(c.id) + if !ok || !nv.Valid() { + continue + } + + online, known := nv.IsOnline().GetOk() + if !known || !online { + continue + } + + for _, p := range nv.AllApprovedRoutes() { + if p == c.route { + advertisersByPrefix[p] = append(advertisersByPrefix[p], c.id) + } + } + } + + // Invariant 4: isPrimaryRoute must be consistent with routes. + for prefix, owner := range primaries { + got := srv.State().GetNodePrimaryRoutes(owner) + if !slices.Contains(got, prefix) { + rt.Fatalf( + "prefix %s primary=%d but GetNodePrimaryRoutes(%d)=%v missing it", + prefix, owner, owner, got, + ) + } + } + + for prefix, owner := range primaries { + nv, ok := st.GetNodeByID(owner) + if !ok || !nv.Valid() { + rt.Fatalf( + "prefix %s primary %d missing from NodeStore", + prefix, owner, + ) + } + + // Invariant 1: primary IsOnline in the NodeStore view. + // state.Disconnect's 10s grace can leave a recently-disconnected + // node with IsOnline=true; the structural rule still requires + // the bit before the snapshot may keep it as primary. + online, known := nv.IsOnline().GetOk() + if !known || !online { + rt.Fatalf( + "prefix %s primary %d is not online (IsOnline=%v known=%v)", + prefix, owner, online, known, + ) + } + + // Invariant 2: primary advertises the prefix and is among the + // snapshot's candidate set. + approved := nv.AllApprovedRoutes() + if !slices.Contains(approved, prefix) { + rt.Fatalf( + "prefix %s primary %d does not advertise it (approved=%v)", + prefix, owner, approved, + ) + } + + if !slices.Contains(advertisersByPrefix[prefix], owner) { + rt.Fatalf( + "prefix %s primary %d not in advertiser set %v", + prefix, owner, advertisersByPrefix[prefix], + ) + } + + // Invariant 5 (anti-flap): if the previous snapshot already had + // a primary for this prefix AND that primary is still a healthy, + // online, advertising candidate, the new snapshot must keep it. + // Skipped on settle reads where we re-check without an op in + // between — the prober may run in a deferred batch and the + // election may legitimately move once on its own timeline. + if !skipAntiFlap { + if prev, hadPrev := prevPrimaries[prefix]; hadPrev && prev != owner { + prevNV, exists := st.GetNodeByID(prev) + if exists && prevNV.Valid() { + prevOnline, prevKnown := prevNV.IsOnline().GetOk() + prevApproved := prevNV.AllApprovedRoutes() + + stillCandidate := prevKnown && prevOnline && + slices.Contains(prevApproved, prefix) + stillHealthy := st.IsNodeHealthy(prev) + + if stillCandidate && stillHealthy { + rt.Fatalf( + "prefix %s flapped primary %d -> %d "+ + "while previous primary was still a healthy candidate", + prefix, prev, owner, + ) + } + } + } + } + } + + // Invariant 6 (all-down honesty): if every candidate of a prefix is + // either offline OR unhealthy, the snapshot must either drop the + // primary OR keep one that is still a candidate from the same set + // (the all-unhealthy preserve-prev rule). A primary pointed at a + // non-candidate after every candidate has gone dark would point + // peers at a node the prober has already declared unreachable. + for prefix := range prefixSet { + owner, hasPrimary := primaries[prefix] + if !hasPrimary { + continue + } + + candidates := advertisersByPrefix[prefix] + + // Empty candidates with a primary present trips invariant 2 + // above; reaching this branch with len(candidates) == 0 means + // the snapshot is internally inconsistent. + if len(candidates) == 0 { + rt.Fatalf( + "prefix %s has primary %d but no advertiser in the snapshot", + prefix, owner, + ) + } + + anyHealthy := slices.ContainsFunc(candidates, st.IsNodeHealthy) + + // Healthy preference: a primary should not be unhealthy when a + // healthy candidate exists. Asserting this through the real + // prober → State → NodeStore seam catches any divergence the + // unit-level model would miss. + if anyHealthy && !st.IsNodeHealthy(owner) { + rt.Fatalf( + "prefix %s primary %d unhealthy but %v has a healthy candidate", + prefix, owner, candidates, + ) + } + + // All-unhealthy guards: with every candidate unhealthy, the + // election has exactly two correct choices: + // - preserve prev primary when it is still a candidate + // - leave the prefix unmapped (no candidate fallback) + // + // Any other outcome points peers at a node already declared + // unreachable. + if !anyHealthy { + prevOwner, hadPrev := prevPrimaries[prefix] + prevStillCandidate := hadPrev && + slices.Contains(candidates, prevOwner) + + if prevStillCandidate && owner != prevOwner { + rt.Fatalf( + "prefix %s all-unhealthy election picked %d, "+ + "but prev primary %d is still a candidate — "+ + "preserve-prev rule violated", + prefix, owner, prevOwner, + ) + } + + if !prevStillCandidate && (!hadPrev || owner != prevOwner) { + rt.Fatalf( + "prefix %s all-unhealthy fallback elected %d "+ + "but prev primary %v is gone; election "+ + "must leave the prefix unmapped", + prefix, owner, prevOwner, + ) + } + } + } +} + +// snapshotPrimaries returns a defensive copy of the live primary +// route map so the caller can compare to a later snapshot without +// aliasing. Inverts GetNodePrimaryRoutes since State does not expose +// the raw snapshot map. +func snapshotPrimaries( + rt *rapid.T, + srv *servertest.TestServer, + clients []*haPropClient, +) map[netip.Prefix]types.NodeID { + return readPrimaries(rt, srv, clients) +} + +// TestHAProberProperty drives a real Headscale TestServer with a small +// fleet of HA-route-advertising clients through a randomised sequence +// of connect / disconnect / reconnect / prober-tick operations and +// asserts that the live PrimaryRoutes() snapshot honours every HA +// invariant after each step. +// +// Coverage versus the unit-level NodeStore property test: +// +// - This test exercises the FULL seam: real prober → real PingNode +// dispatch → real responseCh handling → State.BatchSetNodeHealth +// → NodeStore election → mapper batcher updates. The unit test +// bypasses the prober entirely. +// +// - Disconnect/Reconnect goes through the real poll-session +// lifecycle: mapBatcher.RemoveNode flips IsConnected, the 10s +// grace period defers state.Disconnect, and state.Connect bumps +// SessionEpoch on rejoin. The prober defers fresh-session probes +// so a reconnect mid-cycle does not install a stale Unhealthy. +// +// - The batched BatchSetNodeHealth is observable through the +// all-unhealthy preserve-prev invariant: per-call writes would +// publish an intermediate "one unhealthy, one healthy" snapshot, +// leaving primary on a non-prev node after both flips land. +// +// - The all-unhealthy fallback that leaves prefixes unmapped is +// covered by the honesty invariant: with every candidate +// unhealthy AND prev gone from the candidate set, the prefix +// must stay unmapped instead of pinned to any candidate. +// +// Caveat: TestClient is a real controlclient.Direct that responds to +// PingRequest over Noise, so the timeout path of the prober rarely +// fires. The session-stability guard is asserted +// (freshSinceReconnect tracking + healthy assertion after the first +// probe of a new session) but the tight race — +// probe-against-old-session-while-reconnecting — needs a dedicated +// reconnect test to trigger deterministically. +// +// Ops drawn by rapid: +// +// - ClientDisconnect(i) — cancel poll session, flips IsConnected +// - ClientReconnect(i) — re-register and start new poll session +// - ProberTick() — invoke prober.ProbeOnce synchronously +// - WaitForSnapshot() — re-check invariants without a new op +// +// Initial connect happens once during setup so the fleet always has +// the same baseline. Drawing ClientConnect in the op loop would +// require setting up auth keys and waiting for peer convergence +// inside the rapid body, which would dominate the per-check budget +// for little extra coverage. +func TestHAProberProperty(t *testing.T) { + // Per-check wall-cost is ~15-25s (real Noise handshake on each + // reconnect), so the default 100-check rapid budget blows past + // the 10-minute go test timeout used by the Tests workflow. + // Skip on CI by default so the everyday sweep stays fast; runs + // locally so seed shrinking works without an opt-in flag. + if util.IsCI() { + t.Skip("skipping HA prober property test in CI; runs locally") + } + + if testing.Short() { + t.Skip("skipping property test in short mode") + } + + rapid.Check(t, func(rt *rapid.T) { + // Fresh server per rapid check: rapid shrinks by replaying ops + // from a clean state, so we cannot reuse a server across runs. + // Cleanups registered against checkTB run at the end of THIS + // check so server resources are released before the next one + // starts. + tb := &checkTB{T: t} + defer tb.runCleanups() + + srv := servertest.NewServer(tb) + user := srv.CreateUser(tb, "ha-prop") + + // Three HA candidates is the smallest set that exercises every + // election shape (primary, secondary, tertiary) while keeping + // per-check setup under ~10 seconds. Higher numbers blow out + // the wall-clock budget without adding new failure modes — the + // dual-disconnect and reconnect-during-probe regressions all + // reproduce at N=3. + const numClients = 3 + + route := netip.MustParsePrefix("10.200.0.0/24") + + clients := make([]*haPropClient, 0, numClients) + + for i := range numClients { + name := fmt.Sprintf("ha-prop-r%d", i+1) + tc := servertest.NewClient(tb, srv, name, servertest.WithUser(user)) + + // Wait for at least the initial netmap so the registration + // committed and findNodeID is guaranteed to see the node. + tc.WaitForUpdate(tb, 10*time.Second) + + id := findNodeID(tb, srv, name) + + c := &haPropClient{ + tc: tc, + id: id, + name: name, + route: route, + connected: true, + } + clients = append(clients, c) + + haReadvertise(tb, srv, c) + } + + // Prober uses a short timeout so each ProberTick op drains + // the cycle in well under a second. TestClient is backed by a + // real controlclient.Direct that DOES respond to PingRequest + // over Noise (see TestPingNode), so most probes record a + // successful response. The probe-timeout path still fires for + // nodes whose poll session is mid-bounce — exactly the seam + // the session-stability guard was built for — but timing it + // reliably from outside the prober is hard; the property test + // focuses on the steady-state election invariants instead. + prober := state.NewHAHealthProber( + srv.State(), + types.HARouteConfig{ + ProbeInterval: 30 * time.Second, + ProbeTimeout: 50 * time.Millisecond, + }, + srv.URL, + srv.App.MapBatcher().IsConnected, + ) + + // Sanity: every client should now be an advertiser of route, + // and the snapshot must have an HA primary already. + require.Eventually(tb, func() bool { + for _, c := range clients { + if slices.Contains( + srv.State().GetNodePrimaryRoutes(c.id), + route, + ) { + return true + } + } + + return false + }, 10*time.Second, 50*time.Millisecond, + "setup: route should have a primary before drawing ops") + + idxGen := rapid.IntRange(0, numClients-1) + opGen := rapid.IntRange(0, 3) + opCount := rapid.IntRange(15, 35).Draw(rt, "opCount") + + for step := range opCount { + op := opGen.Draw(rt, fmt.Sprintf("op_%d", step)) + + prev := snapshotPrimaries(rt, srv, clients) + + // Anti-flap is meaningful only across operations that do + // not themselves mutate the candidate set or per-node + // health. Connect/Disconnect both mutate IsOnline (via + // state.Connect, or via the asynchronous grace-period + // state.Disconnect), and the resulting election move is + // expected, not a flap. ProbeOnce, by contrast, must keep + // the primary on the previous owner whenever that owner + // is still a candidate — this is what the reconnect- + // during-probe and dual-disconnect guards preserve. + isProberOp := op == 2 + + switch op { + case 0: // ClientDisconnect + idx := idxGen.Draw(rt, fmt.Sprintf("disc_idx_%d", step)) + c := clients[idx] + + // Refuse to disconnect the last connected node. The + // prober's HANodes gate requires ≥2 online candidates + // per prefix; with only one node left, no probe runs + // and the prober-driven invariants (anti-flap, + // reconnect-defer) become unfalsifiable. Keeping at + // least two candidates is enough to exercise every + // failure shape we care about. + connectedCount := 0 + + for _, cc := range clients { + if cc.connected { + connectedCount++ + } + } + + if c.connected && connectedCount > 2 { + c.tc.Disconnect(tb) + c.connected = false + c.freshSinceReconnect = false + } + + case 1: // ClientReconnect + idx := idxGen.Draw(rt, fmt.Sprintf("rec_idx_%d", step)) + c := clients[idx] + + if !c.connected { + c.tc.Reconnect(tb) + + // Wait for the new poll session to register with + // the batcher so the prober's IsConnected gate + // reflects the reconnect on the next ProberTick. + // WaitForUpdate adds ~1s per call; polling + // IsConnected directly converges in <100ms. + require.Eventually( + tb, + func() bool { + return srv.App.MapBatcher(). + IsConnected(c.id) + }, + 10*time.Second, + 5*time.Millisecond, + "reconnect: batcher should see %s online", + c.name, + ) + + c.connected = true + c.freshSinceReconnect = true + + // Re-push hostinfo so the new session's initial + // map sets RoutableIPs as expected. SetApprovedRoutes + // is idempotent on the same set. + haReadvertise(tb, srv, c) + } + + case 2: // ProberTick — drive one synchronous probe cycle. + // Capture freshness BEFORE running the probe; the + // prober itself flips a node from "fresh" to "stable" + // when it sees the same SessionEpoch twice, and the + // "fresh sessions must defer" rule is expressed + // against the pre-probe state. + freshThisCycle := make(map[types.NodeID]bool, len(clients)) + for _, c := range clients { + freshThisCycle[c.id] = c.freshSinceReconnect + } + + ctx, cancel := context.WithTimeout( + context.Background(), + 5*time.Second, + ) + + // Use App.Change as the dispatcher — the prober batches + // its results through BatchSetNodeHealth and emits a + // single PolicyChange. The real wiring is what we want + // to test. + prober.ProbeOnce(ctx, srv.App.Change) + cancel() + + // Session-stability rule: the first probe against a + // freshly-reconnected node must defer instead of + // installing an Unhealthy bit. In this servertest the + // TestClient is backed by a real controlclient.Direct + // that DOES respond to PingRequests over Noise, so + // the tight timing window — probe dispatched against + // an old session whose ping never returns — rarely + // opens. The rule still runs as a sanity gate: if it + // ever does fire, the freshly-reconnected node will + // surface as unhealthy after a single probe, and the + // test catches it. + for _, c := range clients { + if !freshThisCycle[c.id] || !c.connected { + continue + } + + if !srv.State().IsNodeHealthy(c.id) { + rt.Fatalf( + "node %d (%s) was marked unhealthy by "+ + "the first probe after reconnect; "+ + "session-stability guard should have "+ + "deferred", + c.id, c.name, + ) + } + + c.freshSinceReconnect = false + } + + case 3: // WaitForSnapshot — re-check invariants without + // applying a new op. NodeStore writes are synchronous + // inside UpdateNode/UpdateNodes, so there is nothing + // to wait on; the no-op step still verifies that two + // consecutive reads (one snapshot, the same snapshot) + // stay consistent. This is invariant 3: stable + // conditions = stable primary. We skip the anti-flap + // comparison so a primary that legitimately changed + // in the prior op is not re-flagged here. + checkHAInvariants(rt, srv, clients, prev, true) + + continue + } + + checkHAInvariants(rt, srv, clients, prev, !isProberOp) + } + }) +} diff --git a/hscontrol/state/node_store.go b/hscontrol/state/node_store.go index 18dd78d8..47f9f605 100644 --- a/hscontrol/state/node_store.go +++ b/hscontrol/state/node_store.go @@ -633,14 +633,14 @@ func snapshotFromNodes( } // electPrimaryRoutes picks the primary advertiser for each non-exit +// prefix. Inputs are restricted to online nodes that advertise the // prefix. The previous primary is preserved when it is still online // and healthy (anti-flap); otherwise the lowest-NodeID healthy // advertiser wins. When every advertiser is unhealthy the previous -// primary is preserved if still a candidate, falling back to the -// lowest-NodeID candidate so peers see *some* primary instead of -// none. Anti-flap in the all-unhealthy case matters under cable-pull -// where IsOnline lags reality and a naive lowest-ID fallback churns -// primaries to a node that is itself unreachable (issue #3203). +// primary is preserved only if still a candidate — falling back to +// any other candidate would point peers at a node the prober has +// already declared unreachable, so leaving the prefix unmapped is +// preferred until a probe cycle finds one that responds. func electPrimaryRoutes( nodes map[types.NodeID]types.Node, prev map[netip.Prefix]types.NodeID, diff --git a/hscontrol/state/primaries_property_test.go b/hscontrol/state/primaries_property_test.go index a64664d1..6507e235 100644 --- a/hscontrol/state/primaries_property_test.go +++ b/hscontrol/state/primaries_property_test.go @@ -2,6 +2,7 @@ package state import ( "fmt" + "maps" "net/netip" "slices" "testing" @@ -23,6 +24,19 @@ import ( // the same answer from a separate, deliberately direct implementation. type primariesModel struct { connected map[types.NodeID]bool + // announced mirrors Hostinfo.RoutableIPs — what the client says + // it can route. The election does not look at this directly; it + // is used to recompute the effective set whenever ApprovedRoutes + // changes without a Hostinfo update. + announced map[types.NodeID][]netip.Prefix + // approved mirrors node.ApprovedRoutes — what the admin policy + // allows. SetApprovedRoutes touches this without touching + // announced; ConnectAdvertise / ApprovedRoutesChange touch both. + approved map[types.NodeID][]netip.Prefix + // prefixes is the effective per-node route set — + // AllApprovedRoutes in the implementation, i.e. (announced ∩ + // approved) excluding exit routes. This is what the election + // sees, and what the model uses in advertisersByPrefix. prefixes map[types.NodeID][]netip.Prefix unhealthy map[types.NodeID]bool @@ -36,12 +50,42 @@ type primariesModel struct { func newPrimariesModel() *primariesModel { return &primariesModel{ connected: map[types.NodeID]bool{}, + announced: map[types.NodeID][]netip.Prefix{}, + approved: map[types.NodeID][]netip.Prefix{}, prefixes: map[types.NodeID][]netip.Prefix{}, unhealthy: map[types.NodeID]bool{}, primary: map[netip.Prefix]types.NodeID{}, } } +// recomputeEffective sets m.prefixes[id] to the intersection of +// m.announced[id] and m.approved[id]. Empty intersections clear the +// node from m.prefixes entirely. Matches Node.AllApprovedRoutes / +// SubnetRoutes semantics (exit routes excluded — none of the test +// prefixes hit that branch). +func (m *primariesModel) recomputeEffective(id types.NodeID) { + ann := m.announced[id] + app := m.approved[id] + + if len(ann) == 0 || len(app) == 0 { + delete(m.prefixes, id) + return + } + + eff := make([]netip.Prefix, 0, len(ann)) + for _, p := range ann { + if slices.Contains(app, p) { + eff = append(eff, p) + } + } + + if len(eff) == 0 { + delete(m.prefixes, id) + } else { + m.prefixes[id] = eff + } +} + // advertisersByPrefix returns the connected nodes that announce each // prefix, sorted by NodeID (matches computePrimaries' iteration). func (m *primariesModel) advertisersByPrefix() map[netip.Prefix][]types.NodeID { @@ -99,10 +143,8 @@ func (m *primariesModel) updatePrimaries() { // All-unhealthy fallback: preserve the previous primary if it // is still a candidate, otherwise leave the prefix unmapped. - // electPrimaryRoutes was changed to drop the candidates[0] - // fallback so the Phase-5 (simultaneous dual-disconnect) - // regression cannot pick an already-unhealthy node as - // primary; the model has to track the same behaviour. + // Choosing any candidate would point peers at a node already + // declared unreachable; the model mirrors that policy. if !found && len(nodes) >= 1 { if cur, ok := m.primary[p]; ok && slices.Contains(nodes, cur) { selected = cur @@ -155,8 +197,16 @@ func samePrefixSet(a, b []netip.Prefix) bool { } // checkPrimariesProperties asserts every rule we expect of the snapshot's -// primaries map given the model. -func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nodeIDs []types.NodeID) { +// primaries map given the model. prevSnapshotPrimaries is the snapshot's +// PrimaryRoutes() reading taken before the just-applied op, used to +// catch flap regressions that move primary off a still-eligible owner. +func checkPrimariesProperties( + rt *rapid.T, + ns *NodeStore, + m *primariesModel, + nodeIDs []types.NodeID, + prevSnapshotPrimaries map[netip.Prefix]types.NodeID, +) { rt.Helper() expectedByNode := map[types.NodeID][]netip.Prefix{} @@ -184,9 +234,9 @@ func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nod } // Every prefix that has at least one connected advertiser must - // have a primary in the snapshot. Issue #3203 manifests as a - // prefix silently losing its primary after a disconnect/reconnect - // cycle. + // have a primary in the snapshot: a prefix silently losing its + // primary after a disconnect/reconnect cycle would leave peers + // without a route. for _, p := range m.allPrefixes() { want, expectExists := m.primary[p] if !expectExists { @@ -208,6 +258,105 @@ func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nod ) } } + + // Structural invariants on the live snapshot, independent of the + // model. These catch shapes the model alone cannot — e.g. an owner + // that is offline but still has the prefix attributed to it. + snapshotPrimaries := ns.PrimaryRoutes() + + // A primary that owns ≥1 prefix in `routes` must also light up + // `isPrimaryRoute` (PrimaryRoutesForNode returns nil unless the + // id is in that map). The inverse check — PrimaryRoutesForNode + // returning the right prefixes — is already covered above; this + // catches the reverse direction. + ownersInRoutes := map[types.NodeID]bool{} + for _, owner := range snapshotPrimaries { + ownersInRoutes[owner] = true + } + + for owner := range ownersInRoutes { + if got := ns.PrimaryRoutesForNode(owner); len(got) == 0 { + rt.Fatalf( + "node %d owns a prefix in routes but PrimaryRoutesForNode is empty", + owner, + ) + } + } + + // Per-owner structural checks: every primary must currently be + // online and must currently advertise the prefix it owns. + advertisersByPrefix := m.advertisersByPrefix() + + for prefix, owner := range snapshotPrimaries { + nv, ok := ns.GetNode(owner) + if !ok || !nv.Valid() { + rt.Fatalf( + "prefix %s primary %d not present in NodeStore", + prefix, owner, + ) + } + + // Primary is online: an offline node cannot move packets, so + // election must never leave one as the snapshot's primary. + online, known := nv.IsOnline().GetOk() + if !known || !online { + rt.Fatalf( + "prefix %s primary %d is not online", + prefix, owner, + ) + } + + // Primary advertises the prefix: AllApprovedRoutes (subnet ∪ + // exit) must contain it. A primary that no longer advertises + // is a stale assignment the snapshot rebuild failed to clear. + approved := nv.AllApprovedRoutes() + if !slices.Contains(approved, prefix) { + rt.Fatalf( + "prefix %s primary %d does not advertise it (approved=%v)", + prefix, owner, approved, + ) + } + + // Healthy preference: if any candidate for this prefix is + // healthy, the elected primary must also be healthy. Leaving + // the prefix unmapped is fine; pointing at an unhealthy + // candidate while a healthy one was available is not. + candidates := advertisersByPrefix[prefix] + anyHealthy := false + + for _, c := range candidates { + if !m.unhealthy[c] { + anyHealthy = true + break + } + } + + if anyHealthy && m.unhealthy[owner] { + rt.Fatalf( + "prefix %s primary %d is unhealthy but %v had a healthy candidate", + prefix, owner, candidates, + ) + } + + // Anti-flap: if the previous snapshot already had a primary + // for this prefix AND that primary is still a healthy + // candidate after the op, the election must keep it. Flapping + // the primary for an unrelated reason violates the contract + // the integration tests rely on (HA failover only moves on + // loss of candidacy or health). + if prev, hadPrev := prevSnapshotPrimaries[prefix]; hadPrev { + stillCandidate := slices.Contains(candidates, prev) + stillHealthy := !m.unhealthy[prev] + + if stillCandidate && stillHealthy && owner != prev { + rt.Fatalf( + "prefix %s flapped primary %d -> %d "+ + "while previous primary was still a healthy candidate", + prefix, prev, owner, + ) + } + } + } } // nodeForRapid builds a minimal types.Node for use in property @@ -229,16 +378,41 @@ func nodeForRapid(id types.NodeID) types.Node { } } +// snapshotPrimariesCopy returns a defensive copy of the snapshot's +// prefix→primary map so the caller can compare against a later +// snapshot without aliasing the live map. +func snapshotPrimariesCopy(ns *NodeStore) map[netip.Prefix]types.NodeID { + live := ns.PrimaryRoutes() + + out := make(map[netip.Prefix]types.NodeID, len(live)) + maps.Copy(out, live) + + return out +} + // TestPrimaryRoutesProperty drives NodeStore with a randomised // sequence of high-level operations and checks that the snapshot's // primaries map matches a reference model after every step. // -// Background: issue #3203 reports that HA tracking enters a stuck -// state after a sequence of disconnect/reconnect events. The narrow -// integration and servertest reproductions written for the bug do -// not fail on upstream/main, so this property test broadens the -// search by letting rapid generate sequences we have not enumerated -// by hand. +// Op set (covers the dual-disconnect, batched-probe, and +// all-unhealthy-fallback shapes that election has to handle): +// +// - ConnectAdvertise: online + advertise prefs, clear Unhealthy +// - Disconnect: offline; ApprovedRoutes persists +// - ProbeUnhealthy: set Unhealthy +// - ProbeHealthy: clear Unhealthy +// - ApprovedRoutesChange: change advertised prefs without touching health +// - BatchProbeResults: apply a batch of health flips through +// UpdateNodes so the election runs once for the cycle, matching +// State.BatchSetNodeHealth +// - SimultaneousDisconnect: mark multiple nodes offline atomically +// via UpdateNodes (the dual-cable-pull shape) +// - SetApprovedRoutes: change ApprovedRoutes while leaving +// Hostinfo.RoutableIPs alone — exercises the asymmetry between +// announced and approved +// - OfflineExpiry: IsOnline=false WITHOUT clearing Unhealthy +// (matches the Disconnect path's actual semantics versus +// ConnectAdvertise's full reset) func TestPrimaryRoutesProperty(t *testing.T) { rapid.Check(t, func(rt *rapid.T) { const numNodes = 4 @@ -270,14 +444,29 @@ func TestPrimaryRoutesProperty(t *testing.T) { 0, len(prefixes), func(p netip.Prefix) string { return p.String() }, ) + // nodeSubsetGen draws 1..N distinct node IDs for the batched + // ops. SliceOfNDistinct's lower bound is inclusive, so 1 is + // the smallest sensible batch — a zero-size batch is a no-op + // the rapid harness should not waste shrinking cycles on. + nodeSubsetGen := rapid.SliceOfNDistinct( + nodeGen, + 1, numNodes, + func(id types.NodeID) types.NodeID { return id }, + ) - opCount := rapid.IntRange(5, 60).Draw(rt, "opCount") + opCount := rapid.IntRange(5, 200).Draw(rt, "opCount") for step := range opCount { - op := rapid.IntRange(0, 4).Draw(rt, fmt.Sprintf("op_%d", step)) - id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) + op := rapid.IntRange(0, 8).Draw(rt, fmt.Sprintf("op_%d", step)) + + // Snapshot primaries before applying the op so the + // anti-flap invariant has a stable reference. Reading + // after the model has already changed would compare a + // stale snapshot to a moved model. + prevPrimaries := snapshotPrimariesCopy(ns) switch op { case 0: // ConnectAdvertise — Connect path clears Unhealthy. + id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("prefs_%d", step)) ns.UpdateNode(id, func(n *types.Node) { @@ -287,35 +476,41 @@ func TestPrimaryRoutesProperty(t *testing.T) { n.ApprovedRoutes = prefs }) + m.announced[id] = prefs + m.approved[id] = prefs + m.recomputeEffective(id) + if len(prefs) == 0 { delete(m.connected, id) - delete(m.prefixes, id) } else { m.connected[id] = true - m.prefixes[id] = prefs } delete(m.unhealthy, id) case 1: // Disconnect — IsOnline=false; ApprovedRoutes persists. + id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) ns.UpdateNode(id, func(n *types.Node) { n.IsOnline = new(false) }) delete(m.connected, id) case 2: // ProbeUnhealthy — HA prober marks node bad. + id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) ns.UpdateNode(id, func(n *types.Node) { n.Unhealthy = true }) m.unhealthy[id] = true case 3: // ProbeHealthy — HA prober marks node good. + id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) ns.UpdateNode(id, func(n *types.Node) { n.Unhealthy = false }) delete(m.unhealthy, id) case 4: // ApprovedRoutesChange — change advertised prefs without touching health. + id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step)) prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("prefs_%d", step)) ns.UpdateNode(id, func(n *types.Node) { @@ -324,18 +519,107 @@ func TestPrimaryRoutesProperty(t *testing.T) { n.ApprovedRoutes = prefs }) + m.announced[id] = prefs + m.approved[id] = prefs + m.recomputeEffective(id) + if len(prefs) == 0 { delete(m.connected, id) - delete(m.prefixes, id) } else { m.connected[id] = true - m.prefixes[id] = prefs } + + case 5: // BatchProbeResults — atomic health flips per cycle. + // Mirrors State.BatchSetNodeHealth: per-node + // (id, unhealthy) pairs applied through UpdateNodes + // so the election runs once on the post-batch state. + // Per-call publication would let an intermediate + // "one unhealthy, one healthy" snapshot re-elect off + // the still-healthy node before the second flip + // landed. + ids := nodeSubsetGen.Draw(rt, fmt.Sprintf("batch_ids_%d", step)) + + results := make(map[types.NodeID]bool, len(ids)) + for i, id := range ids { + unhealthy := rapid.Bool().Draw(rt, fmt.Sprintf("batch_h_%d_%d", step, i)) + results[id] = unhealthy + } + + fns := make(map[types.NodeID]UpdateNodeFunc, len(results)) + for id, unhealthy := range results { + fns[id] = func(n *types.Node) { + n.Unhealthy = unhealthy + } + } + + ns.UpdateNodes(fns) + + for id, unhealthy := range results { + if unhealthy { + m.unhealthy[id] = true + } else { + delete(m.unhealthy, id) + } + } + + case 6: // SimultaneousDisconnect — multiple offline in one batch. + // Dual-cable-pull shape: two HA routers' poll + // sessions both close in the same NodeStore tick. + // Per-call Disconnect could leave the snapshot + // momentarily pointing at an offline owner; the + // batched form forces a single rebuild. + ids := nodeSubsetGen.Draw(rt, fmt.Sprintf("disc_ids_%d", step)) + + fns := make(map[types.NodeID]UpdateNodeFunc, len(ids)) + for _, id := range ids { + fns[id] = func(n *types.Node) { + n.IsOnline = new(false) + } + } + + ns.UpdateNodes(fns) + + for _, id := range ids { + delete(m.connected, id) + } + + case 7: // SetApprovedRoutes — change ApprovedRoutes only. + // SetApprovedRoutes in production updates only + // node.ApprovedRoutes; Hostinfo.RoutableIPs (what + // the client announced) is set by the next + // MapRequest. SubnetRoutes intersects the two, so + // dropping ApprovedRoutes mid-flight shrinks the + // advertised set immediately while announcement + // alone never extends it. + id := nodeGen.Draw(rt, fmt.Sprintf("setapp_id_%d", step)) + prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("setapp_prefs_%d", step)) + + ns.UpdateNode(id, func(n *types.Node) { + n.ApprovedRoutes = prefs + }) + + m.approved[id] = prefs + m.recomputeEffective(id) + + case 8: // OfflineExpiry — IsOnline=false, KEEP Unhealthy. + // The Disconnect path does not clear Unhealthy + // (only ConnectAdvertise does on the way back + // up). A probe that marks unhealthy and a grace- + // period disconnect that lands later leaves the + // node with a stale Unhealthy bit; the test + // exercises that shape. + id := nodeGen.Draw(rt, fmt.Sprintf("expire_id_%d", step)) + ns.UpdateNode(id, func(n *types.Node) { + n.IsOnline = new(false) + }) + + delete(m.connected, id) + // m.unhealthy[id] is intentionally NOT cleared. } m.updatePrimaries() - checkPrimariesProperties(rt, ns, m, nodeIDs) + checkPrimariesProperties(rt, ns, m, nodeIDs, prevPrimaries) } }) } diff --git a/hscontrol/state/primaries_test.go b/hscontrol/state/primaries_test.go index c854f6da..1ad13d15 100644 --- a/hscontrol/state/primaries_test.go +++ b/hscontrol/state/primaries_test.go @@ -218,12 +218,12 @@ func TestPrimaries_AllUnhealthyKeepsAPrimary(t *testing.T) { } func TestPrimaries_AllUnhealthyPreservesPrevious(t *testing.T) { - // Issue #3203: once a failover has moved primary to a higher-ID - // node, a subsequent all-unhealthy state must NOT churn primary - // back to the lowest-ID candidate. Under cable-pull semantics - // both nodes can linger as IsOnline=true (half-open TCP) and - // both go Unhealthy — naive `candidates[0]` would flap the - // primary to a node that is itself unreachable. + // Once a failover has moved primary to a higher-ID node, a + // subsequent all-unhealthy state must NOT churn primary back to + // the lowest-ID candidate. Under cable-pull semantics both nodes + // can linger as IsOnline=true (half-open TCP) and both go + // Unhealthy — naive `candidates[0]` would flap the primary to a + // node that is itself unreachable. prefix := mp("10.0.0.0/24") f := newPrimariesFixture(t, 1, 2) f.advertise(1, prefix) @@ -247,12 +247,11 @@ func TestPrimaries_ExitRouteNotElected(t *testing.T) { f.requireNoPrimary(exitV4) } -func TestPrimaries_RegressionIssue3203_BothOfflineThenOneReturns(t *testing.T) { - // Issue #3203: with two HA advertisers, dropping both then - // bringing one back used to leave the prefix without any - // primary. After the refactor the snapshot recomputes primaries - // on every NodeStore write, so the returning advertiser must - // be elected. +func TestPrimaries_BothOfflineThenOneReturns(t *testing.T) { + // With two HA advertisers, dropping both then bringing one back + // used to leave the prefix without any primary. The snapshot + // recomputes primaries on every NodeStore write, so the + // returning advertiser must be elected. prefix := mp("10.0.0.0/24") f := newPrimariesFixture(t, 1, 2) f.advertise(1, prefix) diff --git a/integration/route_test.go b/integration/route_test.go index 911e5c84..aa48da12 100644 --- a/integration/route_test.go +++ b/integration/route_test.go @@ -4066,9 +4066,9 @@ func TestHASubnetRouterFailoverBothOffline(t *testing.T) { // connection until kernel keepalives time out (often >60 s). When // the cable returns, two server-side longpoll sessions can overlap. // -// Issue #3203 reports the bug after cable pulls; this variant blocks all -// traffic between the router container and headscale via iptables and then -// removes the block to mimic that behaviour. +// This variant blocks all traffic between the router container and +// headscale via iptables and then removes the block to mimic the +// cable-pull behaviour. func TestHASubnetRouterFailoverBothOfflineCablePull(t *testing.T) { IntegrationSkip(t)