diff --git a/hscontrol/servertest/ha_property_test.go b/hscontrol/servertest/ha_property_test.go
new file mode 100644
index 00000000..712a1cf0
--- /dev/null
+++ b/hscontrol/servertest/ha_property_test.go
@@ -0,0 +1,642 @@
+package servertest_test
+
+import (
+	"context"
+	"fmt"
+	"net/netip"
+	"slices"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/juanfont/headscale/hscontrol/servertest"
+	"github.com/juanfont/headscale/hscontrol/state"
+	"github.com/juanfont/headscale/hscontrol/types"
+	"github.com/juanfont/headscale/hscontrol/util"
+	"github.com/stretchr/testify/require"
+	"pgregory.net/rapid"
+	"tailscale.com/tailcfg"
+)
+
+// haPropClient bundles a TestClient with the node ID and route it
+// advertises. The property test mutates connection state through
+// these handles and inspects the resulting NodeStore snapshot.
+type haPropClient struct {
+	tc    *servertest.TestClient
+	id    types.NodeID
+	name  string
+	route netip.Prefix
+	// connected mirrors the prober's view (mapBatcher.IsConnected).
+	// Tracked alongside the runtime read so anti-flap invariants can
+	// reason about the candidate set the prober actually evaluates.
+	connected bool
+	// freshSinceReconnect is true between a successful Reconnect and
+	// the prober's second observation of the new SessionEpoch — the
+	// window in which the session-stability guard must defer instead
+	// of installing an Unhealthy bit. Reset by the first ProberTick
+	// that runs against the new session; cleared on Disconnect.
+	freshSinceReconnect bool
+}
+
+// checkTB embeds *testing.T to satisfy testing.TB (which requires an
+// unexported method only the testing package can provide) while
+// collecting Cleanup functions in a local LIFO stack. runCleanups()
+// runs them when the rapid check body exits so each check's resources
+// are released immediately, not at the outer test's teardown — which
+// would otherwise accumulate hundreds of servers and tens of thousands
+// of goroutines across a 300-check run.
+//
+// Fatal/Fatalf still call through to *testing.T and fail the outer
+// test. Inside a rapid check use rt.Fatalf instead so rapid can shrink
+// the failing op sequence.
+type checkTB struct {
+	*testing.T
+
+	mu       sync.Mutex
+	cleanups []func()
+}
+
+func (c *checkTB) Cleanup(fn func()) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.cleanups = append(c.cleanups, fn)
+}
+
+func (c *checkTB) runCleanups() {
+	c.mu.Lock()
+	cs := c.cleanups
+	c.cleanups = nil
+	c.mu.Unlock()
+
+	for i := len(cs) - 1; i >= 0; i-- {
+		cs[i]()
+	}
+}
+
+// haReadvertise pushes hostinfo + approved routes again. Called after
+// every Reconnect so the new poll session re-publishes the prefix. The
+// controlclient.Direct carries the SetHostinfo state across
+// re-registration, but pushing again removes a race where the test
+// inspects PrimaryRoutes before the initial map of the new session
+// landed.
+func haReadvertise(
+	tb testing.TB,
+	srv *servertest.TestServer,
+	c *haPropClient,
+) {
+	tb.Helper()
+
+	c.tc.Direct().SetHostinfo(&tailcfg.Hostinfo{
+		BackendLogID: "servertest-" + c.name,
+		Hostname:     c.name,
+		RoutableIPs:  []netip.Prefix{c.route},
+	})
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	_ = c.tc.Direct().SendUpdate(ctx)
+
+	_, ch, err := srv.State().SetApprovedRoutes(c.id, []netip.Prefix{c.route})
+	require.NoError(tb, err)
+	srv.App.Change(ch)
+}
+
+// readPrimaries builds the prefix→owner map by inverting
+// State.GetNodePrimaryRoutes for every client in the fleet. The State
+// API does not expose the raw snapshot map directly; iterating the
+// known clients reconstructs it without poking at NodeStore internals.
+// Any inconsistency between routes and isPrimaryRoute surfaces as a
+// duplicate ownership claim and trips invariant 4.
+func readPrimaries(
+	rt *rapid.T,
+	srv *servertest.TestServer,
+	clients []*haPropClient,
+) map[netip.Prefix]types.NodeID {
+	rt.Helper()
+
+	out := make(map[netip.Prefix]types.NodeID)
+
+	for _, c := range clients {
+		for _, p := range srv.State().GetNodePrimaryRoutes(c.id) {
+			if prev, ok := out[p]; ok {
+				rt.Fatalf(
+					"prefix %s has two owners: %d and %d "+
+						"(GetNodePrimaryRoutes claimed both)",
+					p, prev, c.id,
+				)
+			}
+
+			out[p] = c.id
+		}
+	}
+
+	return out
+}
+
+// checkHAInvariants walks every prefix → primary mapping in the live
+// snapshot and asserts the six properties documented in the test
+// header. prevPrimaries is the snapshot taken before the just-applied
+// op so anti-flap can compare moves.
+//
+//nolint:gocyclo // invariant checker over several independent properties
+func checkHAInvariants(
+	rt *rapid.T,
+	srv *servertest.TestServer,
+	clients []*haPropClient,
+	prevPrimaries map[netip.Prefix]types.NodeID,
+	skipAntiFlap bool,
+) {
+	rt.Helper()
+
+	st := srv.State()
+	primaries := readPrimaries(rt, srv, clients)
+
+	// Collect every prefix advertised by the fleet so the all-down
+	// honesty check can iterate them, even prefixes with zero current
+	// advertisers.
+	prefixSet := make(map[netip.Prefix]struct{})
+	for _, c := range clients {
+		prefixSet[c.route] = struct{}{}
+	}
+
+	// Build the candidate set per prefix from the NodeStore: a node is a
+	// candidate when it is IsOnline=true and AllApprovedRoutes contains
+	// the prefix. This is exactly the input to electPrimaryRoutes; the
+	// prober's mapBatcher.IsConnected view is a separate gate the
+	// prober consults before dispatching a probe, not part of the
+	// election input.
+	advertisersByPrefix := make(map[netip.Prefix][]types.NodeID)
+
+	for _, c := range clients {
+		nv, ok := st.GetNodeByID(c.id)
+		if !ok || !nv.Valid() {
+			continue
+		}
+
+		online, known := nv.IsOnline().GetOk()
+		if !known || !online {
+			continue
+		}
+
+		for _, p := range nv.AllApprovedRoutes() {
+			if p == c.route {
+				advertisersByPrefix[p] = append(advertisersByPrefix[p], c.id)
+			}
+		}
+	}
+
+	// Invariant 4: isPrimaryRoute must be consistent with routes.
+	for prefix, owner := range primaries {
+		got := srv.State().GetNodePrimaryRoutes(owner)
+		if !slices.Contains(got, prefix) {
+			rt.Fatalf(
+				"prefix %s primary=%d but GetNodePrimaryRoutes(%d)=%v missing it",
+				prefix, owner, owner, got,
+			)
+		}
+	}
+
+	for prefix, owner := range primaries {
+		nv, ok := st.GetNodeByID(owner)
+		if !ok || !nv.Valid() {
+			rt.Fatalf(
+				"prefix %s primary %d missing from NodeStore",
+				prefix, owner,
+			)
+		}
+
+		// Invariant 1: primary IsOnline in the NodeStore view.
+		// state.Disconnect's 10s grace can leave a recently-disconnected
+		// node with IsOnline=true; the structural rule still requires
+		// the bit before the snapshot may keep it as primary.
+		online, known := nv.IsOnline().GetOk()
+		if !known || !online {
+			rt.Fatalf(
+				"prefix %s primary %d is not online (IsOnline=%v known=%v)",
+				prefix, owner, online, known,
+			)
+		}
+
+		// Invariant 2: primary advertises the prefix and is among the
+		// snapshot's candidate set.
+		approved := nv.AllApprovedRoutes()
+		if !slices.Contains(approved, prefix) {
+			rt.Fatalf(
+				"prefix %s primary %d does not advertise it (approved=%v)",
+				prefix, owner, approved,
+			)
+		}
+
+		if !slices.Contains(advertisersByPrefix[prefix], owner) {
+			rt.Fatalf(
+				"prefix %s primary %d not in advertiser set %v",
+				prefix, owner, advertisersByPrefix[prefix],
+			)
+		}
+
+		// Invariant 5 (anti-flap): if the previous snapshot already had
+		// a primary for this prefix AND that primary is still a healthy,
+		// online, advertising candidate, the new snapshot must keep it.
+		// Skipped on settle reads where we re-check without an op in
+		// between — the prober may run in a deferred batch and the
+		// election may legitimately move once on its own timeline.
+		if !skipAntiFlap {
+			if prev, hadPrev := prevPrimaries[prefix]; hadPrev && prev != owner {
+				prevNV, exists := st.GetNodeByID(prev)
+				if exists && prevNV.Valid() {
+					prevOnline, prevKnown := prevNV.IsOnline().GetOk()
+					prevApproved := prevNV.AllApprovedRoutes()
+
+					stillCandidate := prevKnown && prevOnline &&
+						slices.Contains(prevApproved, prefix)
+					stillHealthy := st.IsNodeHealthy(prev)
+
+					if stillCandidate && stillHealthy {
+						rt.Fatalf(
+							"prefix %s flapped primary %d -> %d "+
+								"while previous primary was still a healthy candidate",
+							prefix, prev, owner,
+						)
+					}
+				}
+			}
+		}
+	}
+
+	// Invariant 6 (all-down honesty): if every candidate of a prefix is
+	// either offline OR unhealthy, the snapshot must either drop the
+	// primary OR keep one that is still a candidate from the same set
+	// (the all-unhealthy preserve-prev rule). A primary pointed at a
+	// non-candidate after every candidate has gone dark would point
+	// peers at a node the prober has already declared unreachable.
+	for prefix := range prefixSet {
+		owner, hasPrimary := primaries[prefix]
+		if !hasPrimary {
+			continue
+		}
+
+		candidates := advertisersByPrefix[prefix]
+
+		// Empty candidates with a primary present trips invariant 2
+		// above; reaching this branch with len(candidates) == 0 means
+		// the snapshot is internally inconsistent.
+		if len(candidates) == 0 {
+			rt.Fatalf(
+				"prefix %s has primary %d but no advertiser in the snapshot",
+				prefix, owner,
+			)
+		}
+
+		anyHealthy := slices.ContainsFunc(candidates, st.IsNodeHealthy)
+
+		// Healthy preference: a primary should not be unhealthy when a
+		// healthy candidate exists. Asserting this through the real
+		// prober → State → NodeStore seam catches any divergence the
+		// unit-level model would miss.
+		if anyHealthy && !st.IsNodeHealthy(owner) {
+			rt.Fatalf(
+				"prefix %s primary %d unhealthy but %v has a healthy candidate",
+				prefix, owner, candidates,
+			)
+		}
+
+		// All-unhealthy guards: with every candidate unhealthy, the
+		// election has exactly two correct choices:
+		//   - preserve prev primary when it is still a candidate
+		//   - leave the prefix unmapped (no candidate fallback)
+		//
+		// Any other outcome points peers at a node already declared
+		// unreachable.
+		if !anyHealthy {
+			prevOwner, hadPrev := prevPrimaries[prefix]
+			prevStillCandidate := hadPrev &&
+				slices.Contains(candidates, prevOwner)
+
+			if prevStillCandidate && owner != prevOwner {
+				rt.Fatalf(
+					"prefix %s all-unhealthy election picked %d, "+
+						"but prev primary %d is still a candidate — "+
+						"preserve-prev rule violated",
+					prefix, owner, prevOwner,
+				)
+			}
+
+			if !prevStillCandidate && (!hadPrev || owner != prevOwner) {
+				rt.Fatalf(
+					"prefix %s all-unhealthy fallback elected %d "+
+						"but prev primary %v is gone; election "+
+						"must leave the prefix unmapped",
+					prefix, owner, prevOwner,
+				)
+			}
+		}
+	}
+}
+
+// snapshotPrimaries returns a defensive copy of the live primary
+// route map so the caller can compare to a later snapshot without
+// aliasing. Inverts GetNodePrimaryRoutes since State does not expose
+// the raw snapshot map.
+func snapshotPrimaries(
+	rt *rapid.T,
+	srv *servertest.TestServer,
+	clients []*haPropClient,
+) map[netip.Prefix]types.NodeID {
+	return readPrimaries(rt, srv, clients)
+}
+
+// TestHAProberProperty drives a real Headscale TestServer with a small
+// fleet of HA-route-advertising clients through a randomised sequence
+// of connect / disconnect / reconnect / prober-tick operations and
+// asserts that the live PrimaryRoutes() snapshot honours every HA
+// invariant after each step.
+//
+// Coverage versus the unit-level NodeStore property test:
+//
+//   - This test exercises the FULL seam: real prober → real PingNode
+//     dispatch → real responseCh handling → State.BatchSetNodeHealth
+//     → NodeStore election → mapper batcher updates. The unit test
+//     bypasses the prober entirely.
+//
+//   - Disconnect/Reconnect goes through the real poll-session
+//     lifecycle: mapBatcher.RemoveNode flips IsConnected, the 10s
+//     grace period defers state.Disconnect, and state.Connect bumps
+//     SessionEpoch on rejoin. The prober defers fresh-session probes
+//     so a reconnect mid-cycle does not install a stale Unhealthy.
+//
+//   - The batched BatchSetNodeHealth is observable through the
+//     all-unhealthy preserve-prev invariant: per-call writes would
+//     publish an intermediate "one unhealthy, one healthy" snapshot,
+//     leaving primary on a non-prev node after both flips land.
+//
+//   - The all-unhealthy fallback that leaves prefixes unmapped is
+//     covered by the honesty invariant: with every candidate
+//     unhealthy AND prev gone from the candidate set, the prefix
+//     must stay unmapped instead of pinned to any candidate.
+//
+// Caveat: TestClient is a real controlclient.Direct that responds to
+// PingRequest over Noise, so the timeout path of the prober rarely
+// fires. The session-stability guard is asserted
+// (freshSinceReconnect tracking + healthy assertion after the first
+// probe of a new session) but the tight race —
+// probe-against-old-session-while-reconnecting — needs a dedicated
+// reconnect test to trigger deterministically.
+//
+// Ops drawn by rapid:
+//
+//   - ClientDisconnect(i)     — cancel poll session, flips IsConnected
+//   - ClientReconnect(i)      — re-register and start new poll session
+//   - ProberTick()            — invoke prober.ProbeOnce synchronously
+//   - WaitForSnapshot()       — re-check invariants without a new op
+//
+// Initial connect happens once during setup so the fleet always has
+// the same baseline. Drawing ClientConnect in the op loop would
+// require setting up auth keys and waiting for peer convergence
+// inside the rapid body, which would dominate the per-check budget
+// for little extra coverage.
+func TestHAProberProperty(t *testing.T) {
+	// Per-check wall-cost is ~15-25s (real Noise handshake on each
+	// reconnect), so the default 100-check rapid budget blows past
+	// the 10-minute go test timeout used by the Tests workflow.
+	// Skip on CI by default so the everyday sweep stays fast; runs
+	// locally so seed shrinking works without an opt-in flag.
+	if util.IsCI() {
+		t.Skip("skipping HA prober property test in CI; runs locally")
+	}
+
+	if testing.Short() {
+		t.Skip("skipping property test in short mode")
+	}
+
+	rapid.Check(t, func(rt *rapid.T) {
+		// Fresh server per rapid check: rapid shrinks by replaying ops
+		// from a clean state, so we cannot reuse a server across runs.
+		// Cleanups registered against checkTB run at the end of THIS
+		// check so server resources are released before the next one
+		// starts.
+		tb := &checkTB{T: t}
+		defer tb.runCleanups()
+
+		srv := servertest.NewServer(tb)
+		user := srv.CreateUser(tb, "ha-prop")
+
+		// Three HA candidates is the smallest set that exercises every
+		// election shape (primary, secondary, tertiary) while keeping
+		// per-check setup under ~10 seconds. Higher numbers blow out
+		// the wall-clock budget without adding new failure modes — the
+		// dual-disconnect and reconnect-during-probe regressions all
+		// reproduce at N=3.
+		const numClients = 3
+
+		route := netip.MustParsePrefix("10.200.0.0/24")
+
+		clients := make([]*haPropClient, 0, numClients)
+
+		for i := range numClients {
+			name := fmt.Sprintf("ha-prop-r%d", i+1)
+			tc := servertest.NewClient(tb, srv, name, servertest.WithUser(user))
+
+			// Wait for at least the initial netmap so the registration
+			// committed and findNodeID is guaranteed to see the node.
+			tc.WaitForUpdate(tb, 10*time.Second)
+
+			id := findNodeID(tb, srv, name)
+
+			c := &haPropClient{
+				tc:        tc,
+				id:        id,
+				name:      name,
+				route:     route,
+				connected: true,
+			}
+			clients = append(clients, c)
+
+			haReadvertise(tb, srv, c)
+		}
+
+		// Prober uses a short timeout so each ProberTick op drains
+		// the cycle in well under a second. TestClient is backed by a
+		// real controlclient.Direct that DOES respond to PingRequest
+		// over Noise (see TestPingNode), so most probes record a
+		// successful response. The probe-timeout path still fires for
+		// nodes whose poll session is mid-bounce — exactly the seam
+		// the session-stability guard was built for — but timing it
+		// reliably from outside the prober is hard; the property test
+		// focuses on the steady-state election invariants instead.
+		prober := state.NewHAHealthProber(
+			srv.State(),
+			types.HARouteConfig{
+				ProbeInterval: 30 * time.Second,
+				ProbeTimeout:  50 * time.Millisecond,
+			},
+			srv.URL,
+			srv.App.MapBatcher().IsConnected,
+		)
+
+		// Sanity: every client should now be an advertiser of route,
+		// and the snapshot must have an HA primary already.
+		require.Eventually(tb, func() bool {
+			for _, c := range clients {
+				if slices.Contains(
+					srv.State().GetNodePrimaryRoutes(c.id),
+					route,
+				) {
+					return true
+				}
+			}
+
+			return false
+		}, 10*time.Second, 50*time.Millisecond,
+			"setup: route should have a primary before drawing ops")
+
+		idxGen := rapid.IntRange(0, numClients-1)
+		opGen := rapid.IntRange(0, 3)
+		opCount := rapid.IntRange(15, 35).Draw(rt, "opCount")
+
+		for step := range opCount {
+			op := opGen.Draw(rt, fmt.Sprintf("op_%d", step))
+
+			prev := snapshotPrimaries(rt, srv, clients)
+
+			// Anti-flap is meaningful only across operations that do
+			// not themselves mutate the candidate set or per-node
+			// health. Connect/Disconnect both mutate IsOnline (via
+			// state.Connect, or via the asynchronous grace-period
+			// state.Disconnect), and the resulting election move is
+			// expected, not a flap. ProbeOnce, by contrast, must keep
+			// the primary on the previous owner whenever that owner
+			// is still a candidate — this is what the reconnect-
+			// during-probe and dual-disconnect guards preserve.
+			isProberOp := op == 2
+
+			switch op {
+			case 0: // ClientDisconnect
+				idx := idxGen.Draw(rt, fmt.Sprintf("disc_idx_%d", step))
+				c := clients[idx]
+
+				// Refuse to disconnect the last connected node. The
+				// prober's HANodes gate requires ≥2 online candidates
+				// per prefix; with only one node left, no probe runs
+				// and the prober-driven invariants (anti-flap,
+				// reconnect-defer) become unfalsifiable. Keeping at
+				// least two candidates is enough to exercise every
+				// failure shape we care about.
+				connectedCount := 0
+
+				for _, cc := range clients {
+					if cc.connected {
+						connectedCount++
+					}
+				}
+
+				if c.connected && connectedCount > 2 {
+					c.tc.Disconnect(tb)
+					c.connected = false
+					c.freshSinceReconnect = false
+				}
+
+			case 1: // ClientReconnect
+				idx := idxGen.Draw(rt, fmt.Sprintf("rec_idx_%d", step))
+				c := clients[idx]
+
+				if !c.connected {
+					c.tc.Reconnect(tb)
+
+					// Wait for the new poll session to register with
+					// the batcher so the prober's IsConnected gate
+					// reflects the reconnect on the next ProberTick.
+					// WaitForUpdate adds ~1s per call; polling
+					// IsConnected directly converges in <100ms.
+					require.Eventually(
+						tb,
+						func() bool {
+							return srv.App.MapBatcher().
+								IsConnected(c.id)
+						},
+						10*time.Second,
+						5*time.Millisecond,
+						"reconnect: batcher should see %s online",
+						c.name,
+					)
+
+					c.connected = true
+					c.freshSinceReconnect = true
+
+					// Re-push hostinfo so the new session's initial
+					// map sets RoutableIPs as expected. SetApprovedRoutes
+					// is idempotent on the same set.
+					haReadvertise(tb, srv, c)
+				}
+
+			case 2: // ProberTick — drive one synchronous probe cycle.
+				// Capture freshness BEFORE running the probe; the
+				// prober itself flips a node from "fresh" to "stable"
+				// when it sees the same SessionEpoch twice, and the
+				// "fresh sessions must defer" rule is expressed
+				// against the pre-probe state.
+				freshThisCycle := make(map[types.NodeID]bool, len(clients))
+				for _, c := range clients {
+					freshThisCycle[c.id] = c.freshSinceReconnect
+				}
+
+				ctx, cancel := context.WithTimeout(
+					context.Background(),
+					5*time.Second,
+				)
+
+				// Use App.Change as the dispatcher — the prober batches
+				// its results through BatchSetNodeHealth and emits a
+				// single PolicyChange. The real wiring is what we want
+				// to test.
+				prober.ProbeOnce(ctx, srv.App.Change)
+				cancel()
+
+				// Session-stability rule: the first probe against a
+				// freshly-reconnected node must defer instead of
+				// installing an Unhealthy bit. In this servertest the
+				// TestClient is backed by a real controlclient.Direct
+				// that DOES respond to PingRequests over Noise, so
+				// the tight timing window — probe dispatched against
+				// an old session whose ping never returns — rarely
+				// opens. The rule still runs as a sanity gate: if it
+				// ever does fire, the freshly-reconnected node will
+				// surface as unhealthy after a single probe, and the
+				// test catches it.
+				for _, c := range clients {
+					if !freshThisCycle[c.id] || !c.connected {
+						continue
+					}
+
+					if !srv.State().IsNodeHealthy(c.id) {
+						rt.Fatalf(
+							"node %d (%s) was marked unhealthy by "+
+								"the first probe after reconnect; "+
+								"session-stability guard should have "+
+								"deferred",
+							c.id, c.name,
+						)
+					}
+
+					c.freshSinceReconnect = false
+				}
+
+			case 3: // WaitForSnapshot — re-check invariants without
+				// applying a new op. NodeStore writes are synchronous
+				// inside UpdateNode/UpdateNodes, so there is nothing
+				// to wait on; the no-op step still verifies that two
+				// consecutive reads (one snapshot, the same snapshot)
+				// stay consistent. This is invariant 3: stable
+				// conditions = stable primary. We skip the anti-flap
+				// comparison so a primary that legitimately changed
+				// in the prior op is not re-flagged here.
+				checkHAInvariants(rt, srv, clients, prev, true)
+
+				continue
+			}
+
+			checkHAInvariants(rt, srv, clients, prev, !isProberOp)
+		}
+	})
+}
diff --git a/hscontrol/state/node_store.go b/hscontrol/state/node_store.go
index 18dd78d8..47f9f605 100644
--- a/hscontrol/state/node_store.go
+++ b/hscontrol/state/node_store.go
@@ -633,14 +633,14 @@ func snapshotFromNodes(
 }
 
 // electPrimaryRoutes picks the primary advertiser for each non-exit
+// prefix. Inputs are restricted to online nodes that advertise the
 // prefix. The previous primary is preserved when it is still online
 // and healthy (anti-flap); otherwise the lowest-NodeID healthy
 // advertiser wins. When every advertiser is unhealthy the previous
-// primary is preserved if still a candidate, falling back to the
-// lowest-NodeID candidate so peers see *some* primary instead of
-// none. Anti-flap in the all-unhealthy case matters under cable-pull
-// where IsOnline lags reality and a naive lowest-ID fallback churns
-// primaries to a node that is itself unreachable (issue #3203).
+// primary is preserved only if still a candidate — falling back to
+// any other candidate would point peers at a node the prober has
+// already declared unreachable, so leaving the prefix unmapped is
+// preferred until a probe cycle finds one that responds.
 func electPrimaryRoutes(
 	nodes map[types.NodeID]types.Node,
 	prev map[netip.Prefix]types.NodeID,
diff --git a/hscontrol/state/primaries_property_test.go b/hscontrol/state/primaries_property_test.go
index a64664d1..6507e235 100644
--- a/hscontrol/state/primaries_property_test.go
+++ b/hscontrol/state/primaries_property_test.go
@@ -2,6 +2,7 @@ package state
 
 import (
 	"fmt"
+	"maps"
 	"net/netip"
 	"slices"
 	"testing"
@@ -23,6 +24,19 @@ import (
 // the same answer from a separate, deliberately direct implementation.
 type primariesModel struct {
 	connected map[types.NodeID]bool
+	// announced mirrors Hostinfo.RoutableIPs — what the client says
+	// it can route. The election does not look at this directly; it
+	// is used to recompute the effective set whenever ApprovedRoutes
+	// changes without a Hostinfo update.
+	announced map[types.NodeID][]netip.Prefix
+	// approved mirrors node.ApprovedRoutes — what the admin policy
+	// allows. SetApprovedRoutes touches this without touching
+	// announced; ConnectAdvertise / ApprovedRoutesChange touch both.
+	approved map[types.NodeID][]netip.Prefix
+	// prefixes is the effective per-node route set —
+	// AllApprovedRoutes in the implementation, i.e. (announced ∩
+	// approved) excluding exit routes. This is what the election
+	// sees, and what the model uses in advertisersByPrefix.
 	prefixes  map[types.NodeID][]netip.Prefix
 	unhealthy map[types.NodeID]bool
 
@@ -36,12 +50,42 @@ type primariesModel struct {
 func newPrimariesModel() *primariesModel {
 	return &primariesModel{
 		connected: map[types.NodeID]bool{},
+		announced: map[types.NodeID][]netip.Prefix{},
+		approved:  map[types.NodeID][]netip.Prefix{},
 		prefixes:  map[types.NodeID][]netip.Prefix{},
 		unhealthy: map[types.NodeID]bool{},
 		primary:   map[netip.Prefix]types.NodeID{},
 	}
 }
 
+// recomputeEffective sets m.prefixes[id] to the intersection of
+// m.announced[id] and m.approved[id]. Empty intersections clear the
+// node from m.prefixes entirely. Matches Node.AllApprovedRoutes /
+// SubnetRoutes semantics (exit routes excluded — none of the test
+// prefixes hit that branch).
+func (m *primariesModel) recomputeEffective(id types.NodeID) {
+	ann := m.announced[id]
+	app := m.approved[id]
+
+	if len(ann) == 0 || len(app) == 0 {
+		delete(m.prefixes, id)
+		return
+	}
+
+	eff := make([]netip.Prefix, 0, len(ann))
+	for _, p := range ann {
+		if slices.Contains(app, p) {
+			eff = append(eff, p)
+		}
+	}
+
+	if len(eff) == 0 {
+		delete(m.prefixes, id)
+	} else {
+		m.prefixes[id] = eff
+	}
+}
+
 // advertisersByPrefix returns the connected nodes that announce each
 // prefix, sorted by NodeID (matches computePrimaries' iteration).
 func (m *primariesModel) advertisersByPrefix() map[netip.Prefix][]types.NodeID {
@@ -99,10 +143,8 @@ func (m *primariesModel) updatePrimaries() {
 
 		// All-unhealthy fallback: preserve the previous primary if it
 		// is still a candidate, otherwise leave the prefix unmapped.
-		// electPrimaryRoutes was changed to drop the candidates[0]
-		// fallback so the Phase-5 (simultaneous dual-disconnect)
-		// regression cannot pick an already-unhealthy node as
-		// primary; the model has to track the same behaviour.
+		// Choosing any candidate would point peers at a node already
+		// declared unreachable; the model mirrors that policy.
 		if !found && len(nodes) >= 1 {
 			if cur, ok := m.primary[p]; ok && slices.Contains(nodes, cur) {
 				selected = cur
@@ -155,8 +197,16 @@ func samePrefixSet(a, b []netip.Prefix) bool {
 }
 
 // checkPrimariesProperties asserts every rule we expect of the snapshot's
-// primaries map given the model.
-func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nodeIDs []types.NodeID) {
+// primaries map given the model. prevSnapshotPrimaries is the snapshot's
+// PrimaryRoutes() reading taken before the just-applied op, used to
+// catch flap regressions that move primary off a still-eligible owner.
+func checkPrimariesProperties(
+	rt *rapid.T,
+	ns *NodeStore,
+	m *primariesModel,
+	nodeIDs []types.NodeID,
+	prevSnapshotPrimaries map[netip.Prefix]types.NodeID,
+) {
 	rt.Helper()
 
 	expectedByNode := map[types.NodeID][]netip.Prefix{}
@@ -184,9 +234,9 @@ func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nod
 	}
 
 	// Every prefix that has at least one connected advertiser must
-	// have a primary in the snapshot. Issue #3203 manifests as a
-	// prefix silently losing its primary after a disconnect/reconnect
-	// cycle.
+	// have a primary in the snapshot: a prefix silently losing its
+	// primary after a disconnect/reconnect cycle would leave peers
+	// without a route.
 	for _, p := range m.allPrefixes() {
 		want, expectExists := m.primary[p]
 		if !expectExists {
@@ -208,6 +258,105 @@ func checkPrimariesProperties(rt *rapid.T, ns *NodeStore, m *primariesModel, nod
 			)
 		}
 	}
+
+	// Structural invariants on the live snapshot, independent of the
+	// model. These catch shapes the model alone cannot — e.g. an owner
+	// that is offline but still has the prefix attributed to it.
+	snapshotPrimaries := ns.PrimaryRoutes()
+
+	// A primary that owns ≥1 prefix in `routes` must also light up
+	// `isPrimaryRoute` (PrimaryRoutesForNode returns nil unless the
+	// id is in that map). The inverse check — PrimaryRoutesForNode
+	// returning the right prefixes — is already covered above; this
+	// catches the reverse direction.
+	ownersInRoutes := map[types.NodeID]bool{}
+	for _, owner := range snapshotPrimaries {
+		ownersInRoutes[owner] = true
+	}
+
+	for owner := range ownersInRoutes {
+		if got := ns.PrimaryRoutesForNode(owner); len(got) == 0 {
+			rt.Fatalf(
+				"node %d owns a prefix in routes but PrimaryRoutesForNode is empty",
+				owner,
+			)
+		}
+	}
+
+	// Per-owner structural checks: every primary must currently be
+	// online and must currently advertise the prefix it owns.
+	advertisersByPrefix := m.advertisersByPrefix()
+
+	for prefix, owner := range snapshotPrimaries {
+		nv, ok := ns.GetNode(owner)
+		if !ok || !nv.Valid() {
+			rt.Fatalf(
+				"prefix %s primary %d not present in NodeStore",
+				prefix, owner,
+			)
+		}
+
+		// Primary is online: an offline node cannot move packets, so
+		// election must never leave one as the snapshot's primary.
+		online, known := nv.IsOnline().GetOk()
+		if !known || !online {
+			rt.Fatalf(
+				"prefix %s primary %d is not online",
+				prefix, owner,
+			)
+		}
+
+		// Primary advertises the prefix: AllApprovedRoutes (subnet ∪
+		// exit) must contain it. A primary that no longer advertises
+		// is a stale assignment the snapshot rebuild failed to clear.
+		approved := nv.AllApprovedRoutes()
+		if !slices.Contains(approved, prefix) {
+			rt.Fatalf(
+				"prefix %s primary %d does not advertise it (approved=%v)",
+				prefix, owner, approved,
+			)
+		}
+
+		// Healthy preference: if any candidate for this prefix is
+		// healthy, the elected primary must also be healthy. Leaving
+		// the prefix unmapped is fine; pointing at an unhealthy
+		// candidate while a healthy one was available is not.
+		candidates := advertisersByPrefix[prefix]
+		anyHealthy := false
+
+		for _, c := range candidates {
+			if !m.unhealthy[c] {
+				anyHealthy = true
+				break
+			}
+		}
+
+		if anyHealthy && m.unhealthy[owner] {
+			rt.Fatalf(
+				"prefix %s primary %d is unhealthy but %v had a healthy candidate",
+				prefix, owner, candidates,
+			)
+		}
+
+		// Anti-flap: if the previous snapshot already had a primary
+		// for this prefix AND that primary is still a healthy
+		// candidate after the op, the election must keep it. Flapping
+		// the primary for an unrelated reason violates the contract
+		// the integration tests rely on (HA failover only moves on
+		// loss of candidacy or health).
+		if prev, hadPrev := prevSnapshotPrimaries[prefix]; hadPrev {
+			stillCandidate := slices.Contains(candidates, prev)
+			stillHealthy := !m.unhealthy[prev]
+
+			if stillCandidate && stillHealthy && owner != prev {
+				rt.Fatalf(
+					"prefix %s flapped primary %d -> %d "+
+						"while previous primary was still a healthy candidate",
+					prefix, prev, owner,
+				)
+			}
+		}
+	}
 }
 
 // nodeForRapid builds a minimal types.Node for use in property
@@ -229,16 +378,41 @@ func nodeForRapid(id types.NodeID) types.Node {
 	}
 }
 
+// snapshotPrimariesCopy returns a defensive copy of the snapshot's
+// prefix→primary map so the caller can compare against a later
+// snapshot without aliasing the live map.
+func snapshotPrimariesCopy(ns *NodeStore) map[netip.Prefix]types.NodeID {
+	live := ns.PrimaryRoutes()
+
+	out := make(map[netip.Prefix]types.NodeID, len(live))
+	maps.Copy(out, live)
+
+	return out
+}
+
 // TestPrimaryRoutesProperty drives NodeStore with a randomised
 // sequence of high-level operations and checks that the snapshot's
 // primaries map matches a reference model after every step.
 //
-// Background: issue #3203 reports that HA tracking enters a stuck
-// state after a sequence of disconnect/reconnect events. The narrow
-// integration and servertest reproductions written for the bug do
-// not fail on upstream/main, so this property test broadens the
-// search by letting rapid generate sequences we have not enumerated
-// by hand.
+// Op set (covers the dual-disconnect, batched-probe, and
+// all-unhealthy-fallback shapes that election has to handle):
+//
+//   - ConnectAdvertise: online + advertise prefs, clear Unhealthy
+//   - Disconnect: offline; ApprovedRoutes persists
+//   - ProbeUnhealthy: set Unhealthy
+//   - ProbeHealthy: clear Unhealthy
+//   - ApprovedRoutesChange: change advertised prefs without touching health
+//   - BatchProbeResults: apply a batch of health flips through
+//     UpdateNodes so the election runs once for the cycle, matching
+//     State.BatchSetNodeHealth
+//   - SimultaneousDisconnect: mark multiple nodes offline atomically
+//     via UpdateNodes (the dual-cable-pull shape)
+//   - SetApprovedRoutes: change ApprovedRoutes while leaving
+//     Hostinfo.RoutableIPs alone — exercises the asymmetry between
+//     announced and approved
+//   - OfflineExpiry: IsOnline=false WITHOUT clearing Unhealthy
+//     (matches the Disconnect path's actual semantics versus
+//     ConnectAdvertise's full reset)
 func TestPrimaryRoutesProperty(t *testing.T) {
 	rapid.Check(t, func(rt *rapid.T) {
 		const numNodes = 4
@@ -270,14 +444,29 @@ func TestPrimaryRoutesProperty(t *testing.T) {
 			0, len(prefixes),
 			func(p netip.Prefix) string { return p.String() },
 		)
+		// nodeSubsetGen draws 1..N distinct node IDs for the batched
+		// ops. SliceOfNDistinct's lower bound is inclusive, so 1 is
+		// the smallest sensible batch — a zero-size batch is a no-op
+		// the rapid harness should not waste shrinking cycles on.
+		nodeSubsetGen := rapid.SliceOfNDistinct(
+			nodeGen,
+			1, numNodes,
+			func(id types.NodeID) types.NodeID { return id },
+		)
 
-		opCount := rapid.IntRange(5, 60).Draw(rt, "opCount")
+		opCount := rapid.IntRange(5, 200).Draw(rt, "opCount")
 		for step := range opCount {
-			op := rapid.IntRange(0, 4).Draw(rt, fmt.Sprintf("op_%d", step))
-			id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
+			op := rapid.IntRange(0, 8).Draw(rt, fmt.Sprintf("op_%d", step))
+
+			// Snapshot primaries before applying the op so the
+			// anti-flap invariant has a stable reference. Reading
+			// after the model has already changed would compare a
+			// stale snapshot to a moved model.
+			prevPrimaries := snapshotPrimariesCopy(ns)
 
 			switch op {
 			case 0: // ConnectAdvertise — Connect path clears Unhealthy.
+				id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
 				prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("prefs_%d", step))
 
 				ns.UpdateNode(id, func(n *types.Node) {
@@ -287,35 +476,41 @@ func TestPrimaryRoutesProperty(t *testing.T) {
 					n.ApprovedRoutes = prefs
 				})
 
+				m.announced[id] = prefs
+				m.approved[id] = prefs
+				m.recomputeEffective(id)
+
 				if len(prefs) == 0 {
 					delete(m.connected, id)
-					delete(m.prefixes, id)
 				} else {
 					m.connected[id] = true
-					m.prefixes[id] = prefs
 				}
 
 				delete(m.unhealthy, id)
 
 			case 1: // Disconnect — IsOnline=false; ApprovedRoutes persists.
+				id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
 				ns.UpdateNode(id, func(n *types.Node) {
 					n.IsOnline = new(false)
 				})
 				delete(m.connected, id)
 
 			case 2: // ProbeUnhealthy — HA prober marks node bad.
+				id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
 				ns.UpdateNode(id, func(n *types.Node) {
 					n.Unhealthy = true
 				})
 				m.unhealthy[id] = true
 
 			case 3: // ProbeHealthy — HA prober marks node good.
+				id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
 				ns.UpdateNode(id, func(n *types.Node) {
 					n.Unhealthy = false
 				})
 				delete(m.unhealthy, id)
 
 			case 4: // ApprovedRoutesChange — change advertised prefs without touching health.
+				id := nodeGen.Draw(rt, fmt.Sprintf("id_%d", step))
 				prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("prefs_%d", step))
 
 				ns.UpdateNode(id, func(n *types.Node) {
@@ -324,18 +519,107 @@ func TestPrimaryRoutesProperty(t *testing.T) {
 					n.ApprovedRoutes = prefs
 				})
 
+				m.announced[id] = prefs
+				m.approved[id] = prefs
+				m.recomputeEffective(id)
+
 				if len(prefs) == 0 {
 					delete(m.connected, id)
-					delete(m.prefixes, id)
 				} else {
 					m.connected[id] = true
-					m.prefixes[id] = prefs
 				}
+
+			case 5: // BatchProbeResults — atomic health flips per cycle.
+				// Mirrors State.BatchSetNodeHealth: per-node
+				// (id, unhealthy) pairs applied through UpdateNodes
+				// so the election runs once on the post-batch state.
+				// Per-call publication would let an intermediate
+				// "one unhealthy, one healthy" snapshot re-elect off
+				// the still-healthy node before the second flip
+				// landed.
+				ids := nodeSubsetGen.Draw(rt, fmt.Sprintf("batch_ids_%d", step))
+
+				results := make(map[types.NodeID]bool, len(ids))
+				for i, id := range ids {
+					unhealthy := rapid.Bool().Draw(rt, fmt.Sprintf("batch_h_%d_%d", step, i))
+					results[id] = unhealthy
+				}
+
+				fns := make(map[types.NodeID]UpdateNodeFunc, len(results))
+				for id, unhealthy := range results {
+					fns[id] = func(n *types.Node) {
+						n.Unhealthy = unhealthy
+					}
+				}
+
+				ns.UpdateNodes(fns)
+
+				for id, unhealthy := range results {
+					if unhealthy {
+						m.unhealthy[id] = true
+					} else {
+						delete(m.unhealthy, id)
+					}
+				}
+
+			case 6: // SimultaneousDisconnect — multiple offline in one batch.
+				// Dual-cable-pull shape: two HA routers' poll
+				// sessions both close in the same NodeStore tick.
+				// Per-call Disconnect could leave the snapshot
+				// momentarily pointing at an offline owner; the
+				// batched form forces a single rebuild.
+				ids := nodeSubsetGen.Draw(rt, fmt.Sprintf("disc_ids_%d", step))
+
+				fns := make(map[types.NodeID]UpdateNodeFunc, len(ids))
+				for _, id := range ids {
+					fns[id] = func(n *types.Node) {
+						n.IsOnline = new(false)
+					}
+				}
+
+				ns.UpdateNodes(fns)
+
+				for _, id := range ids {
+					delete(m.connected, id)
+				}
+
+			case 7: // SetApprovedRoutes — change ApprovedRoutes only.
+				// SetApprovedRoutes in production updates only
+				// node.ApprovedRoutes; Hostinfo.RoutableIPs (what
+				// the client announced) is set by the next
+				// MapRequest. SubnetRoutes intersects the two, so
+				// dropping ApprovedRoutes mid-flight shrinks the
+				// advertised set immediately while announcement
+				// alone never extends it.
+				id := nodeGen.Draw(rt, fmt.Sprintf("setapp_id_%d", step))
+				prefs := prefixSubsetGen.Draw(rt, fmt.Sprintf("setapp_prefs_%d", step))
+
+				ns.UpdateNode(id, func(n *types.Node) {
+					n.ApprovedRoutes = prefs
+				})
+
+				m.approved[id] = prefs
+				m.recomputeEffective(id)
+
+			case 8: // OfflineExpiry — IsOnline=false, KEEP Unhealthy.
+				// The Disconnect path does not clear Unhealthy
+				// (only ConnectAdvertise does on the way back
+				// up). A probe that marks unhealthy and a grace-
+				// period disconnect that lands later leaves the
+				// node with a stale Unhealthy bit; the test
+				// exercises that shape.
+				id := nodeGen.Draw(rt, fmt.Sprintf("expire_id_%d", step))
+				ns.UpdateNode(id, func(n *types.Node) {
+					n.IsOnline = new(false)
+				})
+
+				delete(m.connected, id)
+				// m.unhealthy[id] is intentionally NOT cleared.
 			}
 
 			m.updatePrimaries()
 
-			checkPrimariesProperties(rt, ns, m, nodeIDs)
+			checkPrimariesProperties(rt, ns, m, nodeIDs, prevPrimaries)
 		}
 	})
 }
diff --git a/hscontrol/state/primaries_test.go b/hscontrol/state/primaries_test.go
index c854f6da..1ad13d15 100644
--- a/hscontrol/state/primaries_test.go
+++ b/hscontrol/state/primaries_test.go
@@ -218,12 +218,12 @@ func TestPrimaries_AllUnhealthyKeepsAPrimary(t *testing.T) {
 }
 
 func TestPrimaries_AllUnhealthyPreservesPrevious(t *testing.T) {
-	// Issue #3203: once a failover has moved primary to a higher-ID
-	// node, a subsequent all-unhealthy state must NOT churn primary
-	// back to the lowest-ID candidate. Under cable-pull semantics
-	// both nodes can linger as IsOnline=true (half-open TCP) and
-	// both go Unhealthy — naive `candidates[0]` would flap the
-	// primary to a node that is itself unreachable.
+	// Once a failover has moved primary to a higher-ID node, a
+	// subsequent all-unhealthy state must NOT churn primary back to
+	// the lowest-ID candidate. Under cable-pull semantics both nodes
+	// can linger as IsOnline=true (half-open TCP) and both go
+	// Unhealthy — naive `candidates[0]` would flap the primary to a
+	// node that is itself unreachable.
 	prefix := mp("10.0.0.0/24")
 	f := newPrimariesFixture(t, 1, 2)
 	f.advertise(1, prefix)
@@ -247,12 +247,11 @@ func TestPrimaries_ExitRouteNotElected(t *testing.T) {
 	f.requireNoPrimary(exitV4)
 }
 
-func TestPrimaries_RegressionIssue3203_BothOfflineThenOneReturns(t *testing.T) {
-	// Issue #3203: with two HA advertisers, dropping both then
-	// bringing one back used to leave the prefix without any
-	// primary. After the refactor the snapshot recomputes primaries
-	// on every NodeStore write, so the returning advertiser must
-	// be elected.
+func TestPrimaries_BothOfflineThenOneReturns(t *testing.T) {
+	// With two HA advertisers, dropping both then bringing one back
+	// used to leave the prefix without any primary. The snapshot
+	// recomputes primaries on every NodeStore write, so the
+	// returning advertiser must be elected.
 	prefix := mp("10.0.0.0/24")
 	f := newPrimariesFixture(t, 1, 2)
 	f.advertise(1, prefix)
diff --git a/integration/route_test.go b/integration/route_test.go
index 911e5c84..aa48da12 100644
--- a/integration/route_test.go
+++ b/integration/route_test.go
@@ -4066,9 +4066,9 @@ func TestHASubnetRouterFailoverBothOffline(t *testing.T) {
 //     connection until kernel keepalives time out (often >60 s). When
 //     the cable returns, two server-side longpoll sessions can overlap.
 //
-// Issue #3203 reports the bug after cable pulls; this variant blocks all
-// traffic between the router container and headscale via iptables and then
-// removes the block to mimic that behaviour.
+// This variant blocks all traffic between the router container and
+// headscale via iptables and then removes the block to mimic the
+// cable-pull behaviour.
 func TestHASubnetRouterFailoverBothOfflineCablePull(t *testing.T) {
 	IntegrationSkip(t)