mirror of
https://github.com/juanfont/headscale.git
synced 2026-05-23 18:48:42 +09:00
state: leave prefix unmapped when all primary candidates unhealthy
electPrimaryRoutes' all-unhealthy fallback picked candidates[0] when the previous primary was no longer a candidate. The Phase-5 simultaneous dual-disconnect path in TestHASubnetRouterFailoverDocker Disconnect hits this asymmetrically: a batched probe cycle marks both routers unhealthy with prev=r2 preserved, then the grace-period Disconnect for r2 drops it from candidates. With prev gone and the remaining r1 still carrying its Unhealthy bit, the fallback pointed peers at the cable-pulled r1 — flapping primary to an unreachable node and tripping requirePrimaryStable. Leave the prefix unmapped when prev is gone and every candidate is unhealthy. Peers see no advertiser instead of an unreachable one, which is honest: the next probe cycle re-evaluates and picks whichever node responds. The property-test model that mirrored the old behaviour is updated to match.
This commit is contained in:
@@ -692,14 +692,16 @@ func electPrimaryRoutes(
|
||||
}
|
||||
}
|
||||
|
||||
// All-unhealthy fallback: preserve the previous primary only
|
||||
// when it is still a candidate. Falling back to any candidate
|
||||
// would point peers at a node the prober has already declared
|
||||
// unreachable; leaving the prefix unmapped is honest until a
|
||||
// probe cycle picks one that responds.
|
||||
if !found && len(candidates) >= 1 {
|
||||
if cur, ok := prev[prefix]; ok && slices.Contains(candidates, cur) {
|
||||
selected = cur
|
||||
} else {
|
||||
selected = candidates[0]
|
||||
found = true
|
||||
}
|
||||
|
||||
found = true
|
||||
}
|
||||
|
||||
if found {
|
||||
|
||||
@@ -97,18 +97,23 @@ func (m *primariesModel) updatePrimaries() {
|
||||
}
|
||||
}
|
||||
|
||||
// All-unhealthy fallback: preserve the previous primary if it
|
||||
// is still a candidate, otherwise leave the prefix unmapped.
|
||||
// electPrimaryRoutes was changed to drop the candidates[0]
|
||||
// fallback so the Phase-5 (simultaneous dual-disconnect)
|
||||
// regression cannot pick an already-unhealthy node as
|
||||
// primary; the model has to track the same behaviour.
|
||||
if !found && len(nodes) >= 1 {
|
||||
if cur, ok := m.primary[p]; ok && slices.Contains(nodes, cur) {
|
||||
selected = cur
|
||||
} else {
|
||||
selected = nodes[0]
|
||||
found = true
|
||||
}
|
||||
|
||||
found = true
|
||||
}
|
||||
|
||||
if found {
|
||||
m.primary[p] = selected
|
||||
} else {
|
||||
delete(m.primary, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user