integration: add cap/relay grant peer relay lifecycle test

Add ConnectToNetwork to the TailscaleClient interface for multi-network test scenarios and implement peer relay ping support. Use these to test that cap/relay grants correctly enable peer-to-peer relay connections between tagged nodes.

Updates #2180
This commit is contained in:
Kristoffer Dalby
2026-03-23 08:22:41 +00:00
parent 8573ff9158
commit 9b1a6b6c05
3 changed files with 521 additions and 1 deletions

View File

@@ -0,0 +1,513 @@
package integration
import (
"net/netip"
"strings"
"testing"
"time"
policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/integration/hsic"
"github.com/juanfont/headscale/integration/tsic"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"tailscale.com/tailcfg"
"tailscale.com/wgengine/filter"
)
// hasCapMatchInPacketFilter checks if any Match entry in the packet
// filter contains a CapMatch with the given capability name.
func hasCapMatchInPacketFilter(pf []filter.Match, peerCap tailcfg.PeerCapability) bool {
for _, m := range pf {
for _, cm := range m.Caps {
if cm.Cap == peerCap {
return true
}
}
}
return false
}
// hasCapMatchForIP checks if any CapMatch with the given capability
// has a Dst prefix that contains the given IP. This validates that
// the cap is directed at the correct node, not just present.
func hasCapMatchForIP(pf []filter.Match, peerCap tailcfg.PeerCapability, ip netip.Addr) bool {
for _, m := range pf {
for _, cm := range m.Caps {
if cm.Cap == peerCap && cm.Dst.Contains(ip) {
return true
}
}
}
return false
}
// parsePeerRelay parses a PeerRelay string of the form "ip:port:vni:N"
// and returns the address and VNI. Returns zero values on parse failure.
func parsePeerRelay(pr string) (netip.AddrPort, string, bool) {
// Format: "172.18.0.4:58738:vni:1"
// Split into: host part "172.18.0.4:58738" and vni part "vni:1"
addrStr, vni, ok := strings.Cut(pr, ":vni:")
if !ok {
return netip.AddrPort{}, "", false
}
ap, err := netip.ParseAddrPort(addrStr)
if err != nil {
return netip.AddrPort{}, "", false
}
return ap, vni, true
}
// TestGrantCapRelay validates the full peer relay lifecycle:
// 1. No direct connection between isolated clients
// 2. Cap grants compile correctly (relay + relay-target in packet filters)
// with strict directionality and negative checks
// 3. Peer relay is used instead of DERP (PeerRelay non-empty, valid format)
// 4. Relay goes down -> fallback to DERP (PeerRelay empty, Relay non-empty,
// DERP ping works)
// 5. Relay comes back up -> peer relay resumes (PeerRelay non-empty again)
func TestGrantCapRelay(t *testing.T) {
IntegrationSkip(t)
assertTimeout := 120 * time.Second
spec := ScenarioSpec{
NodesPerUser: 0,
Users: []string{"relay", "clienta", "clientb"},
Networks: map[string][]string{
"usernet1": {"clienta"},
"usernet2": {"clientb"},
"usernet3": {"relay"},
},
Versions: []string{"head"},
}
scenario, err := NewScenario(spec)
require.NoErrorf(t, err, "failed to create scenario: %s", err)
defer scenario.ShutdownAssertNoPanics(t)
pol := &policyv2.Policy{
TagOwners: policyv2.TagOwners{
policyv2.Tag("tag:relay"): policyv2.Owners{usernameOwner("relay@")},
policyv2.Tag("tag:client-a"): policyv2.Owners{usernameOwner("clienta@")},
policyv2.Tag("tag:client-b"): policyv2.Owners{usernameOwner("clientb@")},
},
Grants: []policyv2.Grant{
// Grant 1: Basic IP connectivity between all tagged nodes.
{
Sources: policyv2.Aliases{
tagp("tag:relay"), tagp("tag:client-a"), tagp("tag:client-b"),
},
Destinations: policyv2.Aliases{
tagp("tag:relay"), tagp("tag:client-a"), tagp("tag:client-b"),
},
InternetProtocols: []policyv2.ProtocolPort{
{Protocol: "*", Ports: []tailcfg.PortRange{tailcfg.PortRangeAny}},
},
},
// Grant 2: Relay cap - clients can use relay node for UDP relaying.
// This generates cap/relay on the relay's filter and cap/relay-target
// (companion) on the clients' filters.
{
Sources: policyv2.Aliases{tagp("tag:client-a"), tagp("tag:client-b")},
Destinations: policyv2.Aliases{tagp("tag:relay")},
App: tailcfg.PeerCapMap{
tailcfg.PeerCapabilityRelay: {tailcfg.RawMessage("{}")},
},
},
},
}
headscale, err := scenario.Headscale(
hsic.WithTestName("grant-cap-relay"),
hsic.WithACLPolicy(pol),
hsic.WithPolicyMode(types.PolicyModeDB),
)
requireNoErrGetHeadscale(t, err)
usernet1, err := scenario.Network("usernet1")
require.NoError(t, err)
usernet2, err := scenario.Network("usernet2")
require.NoError(t, err)
usernet3, err := scenario.Network("usernet3")
require.NoError(t, err)
// Create users on headscale server.
_, err = scenario.CreateUser("relay")
require.NoError(t, err)
_, err = scenario.CreateUser("clienta")
require.NoError(t, err)
_, err = scenario.CreateUser("clientb")
require.NoError(t, err)
userMap, err := headscale.MapUsers()
require.NoError(t, err)
// --- Create Relay R on usernet3, dual-homed to usernet1+usernet2 ---
relayR, err := scenario.CreateTailscaleNode("head",
tsic.WithNetwork(usernet3),
)
require.NoError(t, err)
defer func() { _, _, _ = relayR.Shutdown() }()
pakRelay, err := scenario.CreatePreAuthKeyWithTags(
userMap["relay"].GetId(), false, false, []string{"tag:relay"},
)
require.NoError(t, err)
err = relayR.Login(headscale.GetEndpoint(), pakRelay.GetKey())
require.NoError(t, err)
err = relayR.WaitForRunning(30 * time.Second)
require.NoError(t, err)
// Dual-home after registration to avoid duplicate node key generation
// from Docker network interface changes during tailscaled startup.
err = relayR.ConnectToNetwork(usernet1)
require.NoError(t, err)
err = relayR.ConnectToNetwork(usernet2)
require.NoError(t, err)
// Enable the relay server on the relay node. Without this, the
// relayserver extension loads but RelayServerPort is nil and the
// server never starts listening for allocation requests.
// Port 0 = random unused port.
_, _, err = relayR.Execute([]string{
"tailscale", "set", "--relay-server-port=0",
})
require.NoError(t, err)
// --- Create Client A on usernet1 only ---
clientA, err := scenario.CreateTailscaleNode("head",
tsic.WithNetwork(usernet1),
)
require.NoError(t, err)
defer func() { _, _, _ = clientA.Shutdown() }()
pakClientA, err := scenario.CreatePreAuthKeyWithTags(
userMap["clienta"].GetId(), false, false, []string{"tag:client-a"},
)
require.NoError(t, err)
err = clientA.Login(headscale.GetEndpoint(), pakClientA.GetKey())
require.NoError(t, err)
err = clientA.WaitForRunning(30 * time.Second)
require.NoError(t, err)
// --- Create Client B on usernet2 only ---
clientB, err := scenario.CreateTailscaleNode("head",
tsic.WithNetwork(usernet2),
)
require.NoError(t, err)
defer func() { _, _, _ = clientB.Shutdown() }()
pakClientB, err := scenario.CreatePreAuthKeyWithTags(
userMap["clientb"].GetId(), false, false, []string{"tag:client-b"},
)
require.NoError(t, err)
err = clientB.Login(headscale.GetEndpoint(), pakClientB.GetKey())
require.NoError(t, err)
err = clientB.WaitForRunning(30 * time.Second)
require.NoError(t, err)
// ===== Phase 1: Validate isolation and peer visibility =====
t.Log("Phase 1: Validate network isolation and peer visibility")
allNodes := []TailscaleClient{relayR, clientA, clientB}
for _, node := range allNodes {
err = node.WaitForPeers(len(allNodes)-1, 60*time.Second, 1*time.Second)
require.NoErrorf(t, err, "node %s failed to see all peers", node.Hostname())
}
// Restart all nodes to ensure fresh wireguard config. When nodes
// register sequentially, early peers may arrive without DERP info
// and get permanently skipped in wireguard config.
for _, node := range allNodes {
require.NoError(t, node.Restart())
require.NoError(t, node.WaitForRunning(30*time.Second))
}
for _, node := range allNodes {
err = node.WaitForPeers(len(allNodes)-1, 60*time.Second, 1*time.Second)
require.NoErrorf(t, err, "node %s failed to see all peers after restart", node.Hostname())
}
// Capture keys and IPs for assertions.
clientBKey := clientB.MustStatus().Self.PublicKey
clientAKey := clientA.MustStatus().Self.PublicKey
relayIPv4 := relayR.MustIPv4()
clientAIPv4 := clientA.MustIPv4()
clientBIPv4 := clientB.MustIPv4()
// Verify no direct path between A and B.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := clientA.Status()
assert.NoError(c, err)
peerB := status.Peer[clientBKey]
assert.NotNil(c, peerB, "A should see B as a peer")
if peerB != nil {
assert.Empty(c, peerB.CurAddr, "A->B should have no direct path")
}
}, assertTimeout, 500*time.Millisecond, "A should have no direct path to B")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := clientB.Status()
assert.NoError(c, err)
peerA := status.Peer[clientAKey]
assert.NotNil(c, peerA, "B should see A as a peer")
if peerA != nil {
assert.Empty(c, peerA.CurAddr, "B->A should have no direct path")
}
}, assertTimeout, 500*time.Millisecond, "B should have no direct path to A")
// ===== Phase 2: Validate cap grants in packet filters =====
t.Log("Phase 2: Validate cap grants in packet filters")
// --- Positive checks: correct caps on correct nodes ---
// Relay R should have cap/relay targeting the relay's own IP.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := relayR.PacketFilter()
assert.NoError(c, err)
assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelay, relayIPv4),
"Relay R should have cap/relay with Dst matching relay's IP %s", relayIPv4)
}, assertTimeout, 500*time.Millisecond, "R should have cap/relay targeting its own IP")
// Client A should have cap/relay-target targeting client A's IP.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := clientA.PacketFilter()
assert.NoError(c, err)
assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelayTarget, clientAIPv4),
"Client A should have cap/relay-target with Dst matching A's IP %s", clientAIPv4)
}, assertTimeout, 500*time.Millisecond, "A should have cap/relay-target targeting its own IP")
// Client B should have cap/relay-target targeting client B's IP.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := clientB.PacketFilter()
assert.NoError(c, err)
assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelayTarget, clientBIPv4),
"Client B should have cap/relay-target with Dst matching B's IP %s", clientBIPv4)
}, assertTimeout, 500*time.Millisecond, "B should have cap/relay-target targeting its own IP")
// --- Negative checks: wrong caps must NOT be present ---
// Relay R should NOT have cap/relay-target (it's a relay server, not a target).
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := relayR.PacketFilter()
assert.NoError(c, err)
assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelayTarget),
"Relay R should NOT have cap/relay-target")
}, 10*time.Second, 500*time.Millisecond, "R should not have cap/relay-target")
// Client A should NOT have cap/relay (it's a client, not a relay server).
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := clientA.PacketFilter()
assert.NoError(c, err)
assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelay),
"Client A should NOT have cap/relay")
}, 10*time.Second, 500*time.Millisecond, "A should not have cap/relay")
// Client B should NOT have cap/relay (it's a client, not a relay server).
assert.EventuallyWithT(t, func(c *assert.CollectT) {
pf, err := clientB.PacketFilter()
assert.NoError(c, err)
assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelay),
"Client B should NOT have cap/relay")
}, 10*time.Second, 500*time.Millisecond, "B should not have cap/relay")
// ===== Phase 3: Validate peer relay active (not DERP) =====
t.Log("Phase 3: Validate peer relay active (not DERP)")
// Verify PeerRelay is set with valid format and correct relay IPs.
// Relay endpoint allocation is triggered by traffic between peers,
// so we send pings in the check loop to initiate relay discovery.
var peerRelayAtoB, peerRelayBtoA string
assert.EventuallyWithT(t, func(c *assert.CollectT) {
// Fire a ping to trigger relay path discovery (ignore output).
clientA.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientBIPv4.String()}) //nolint:errcheck
status, err := clientA.Status()
assert.NoError(c, err)
peerB := status.Peer[clientBKey]
assert.NotNil(c, peerB, "A should see B as a peer")
if peerB != nil {
assert.NotEmpty(c, peerB.PeerRelay,
"A->B should use peer relay, not DERP")
assert.Empty(c, peerB.CurAddr,
"A->B should not have direct connection")
if peerB.PeerRelay != "" {
peerRelayAtoB = peerB.PeerRelay
// Validate PeerRelay format: ip:port:vni:N
ap, vni, ok := parsePeerRelay(peerB.PeerRelay)
assert.True(c, ok,
"PeerRelay %q should be parseable as ip:port:vni:N", peerB.PeerRelay)
if ok {
assert.NotZero(c, ap.Port(),
"PeerRelay port should be non-zero")
assert.NotEmpty(c, vni,
"PeerRelay VNI should be non-empty")
}
}
t.Logf("Phase 3 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active)
}
}, assertTimeout, 2*time.Second, "A should show peer relay to B")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
clientB.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientAIPv4.String()}) //nolint:errcheck
status, err := clientB.Status()
assert.NoError(c, err)
peerA := status.Peer[clientAKey]
assert.NotNil(c, peerA, "B should see A as a peer")
if peerA != nil {
assert.NotEmpty(c, peerA.PeerRelay,
"B->A should use peer relay, not DERP")
if peerA.PeerRelay != "" {
peerRelayBtoA = peerA.PeerRelay
ap, vni, ok := parsePeerRelay(peerA.PeerRelay)
assert.True(c, ok,
"PeerRelay %q should be parseable as ip:port:vni:N", peerA.PeerRelay)
if ok {
assert.NotZero(c, ap.Port(),
"PeerRelay port should be non-zero")
assert.NotEmpty(c, vni,
"PeerRelay VNI should be non-empty")
}
}
t.Logf("Phase 3 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active)
}
}, assertTimeout, 2*time.Second, "B should show peer relay to A")
// Cross-validate: both directions should use the same VNI
// (same relay allocation) but different IPs (dual-homed relay).
if peerRelayAtoB != "" && peerRelayBtoA != "" {
apA, vniA, okA := parsePeerRelay(peerRelayAtoB)
apB, vniB, okB := parsePeerRelay(peerRelayBtoA)
if okA && okB {
assert.Equal(t, vniA, vniB,
"A->B and B->A should share the same VNI (same relay allocation)")
assert.Equal(t, apA.Port(), apB.Port(),
"A->B and B->A should use the same relay port")
assert.NotEqual(t, apA.Addr(), apB.Addr(),
"A->B and B->A relay IPs should differ (dual-homed relay)")
}
}
// ===== Phase 4: Bring relay down -> DERP fallback =====
t.Log("Phase 4: Bring relay down, expect DERP fallback")
require.NoError(t, relayR.Down())
// Verify PeerRelay is gone and DERP is used.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := clientA.Status()
assert.NoError(c, err)
peerB := status.Peer[clientBKey]
assert.NotNil(c, peerB, "A should still see B as a peer")
if peerB != nil {
assert.Empty(c, peerB.PeerRelay,
"A->B peer relay should be gone")
assert.NotEmpty(c, peerB.Relay,
"A->B should fall back to DERP")
t.Logf("Phase 4 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active)
}
}, assertTimeout, 500*time.Millisecond, "A should fall back to DERP for B")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := clientB.Status()
assert.NoError(c, err)
peerA := status.Peer[clientAKey]
assert.NotNil(c, peerA, "B should still see A as a peer")
if peerA != nil {
assert.Empty(c, peerA.PeerRelay,
"B->A peer relay should be gone")
t.Logf("Phase 4 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active)
}
}, assertTimeout, 500*time.Millisecond, "B should fall back to DERP for A")
// Verify data plane works via DERP after relay is down.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
err := clientA.Ping(
clientBIPv4.String(),
tsic.WithPingUntilDirect(false),
tsic.WithPingTimeout(2*time.Second),
tsic.WithPingCount(1),
)
assert.NoError(c, err)
}, assertTimeout, 1*time.Second, "A should reach B via DERP after relay down")
// ===== Phase 5: Bring relay back up -> peer relay resumes =====
t.Log("Phase 5: Bring relay back up, expect peer relay to resume")
require.NoError(t, relayR.Up())
err = relayR.WaitForRunning(30 * time.Second)
require.NoError(t, err)
// Verify peer relay resumes. Ping to trigger relay re-discovery.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
clientA.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientBIPv4.String()}) //nolint:errcheck
status, err := clientA.Status()
assert.NoError(c, err)
peerB := status.Peer[clientBKey]
assert.NotNil(c, peerB, "A should see B as a peer")
if peerB != nil {
assert.NotEmpty(c, peerB.PeerRelay,
"A->B peer relay should resume after R comes back")
t.Logf("Phase 5 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active)
}
}, assertTimeout, 2*time.Second, "A should resume peer relay to B")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
clientB.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientAIPv4.String()}) //nolint:errcheck
status, err := clientB.Status()
assert.NoError(c, err)
peerA := status.Peer[clientAKey]
assert.NotNil(c, peerA, "B should see A as a peer")
if peerA != nil {
assert.NotEmpty(c, peerA.PeerRelay,
"B->A peer relay should resume after R comes back")
t.Logf("Phase 5 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v",
peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active)
}
}, assertTimeout, 2*time.Second, "B should resume peer relay to A")
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/juanfont/headscale/hscontrol/util"
"github.com/juanfont/headscale/integration/dockertestutil"
"github.com/juanfont/headscale/integration/tsic"
"github.com/ory/dockertest/v3"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/netcheck"
"tailscale.com/types/key"
@@ -56,6 +57,7 @@ type TailscaleClient interface {
MustID() types.NodeID
ReadFile(path string) ([]byte, error)
PacketFilter() ([]filter.Match, error)
ConnectToNetwork(network *dockertest.Network) error
// FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client
// and a bool indicating if the clients online count and peer count is equal.

View File

@@ -1403,7 +1403,7 @@ func (t *TailscaleInContainer) Ping(hostnameOrIP string, opts ...PingOption) err
}
if !args.direct {
if strings.Contains(result, "via DERP") {
if strings.Contains(result, "via DERP") || strings.Contains(result, "via relay") {
return nil
} else {
return errTailscalePingNotDERP
@@ -1625,6 +1625,11 @@ func (t *TailscaleInContainer) GetNodePrivateKey() (*key.NodePrivate, error) {
return &p.Persist.PrivateNodeKey, nil
}
// ConnectToNetwork connects the Tailscale container to an additional Docker network.
func (t *TailscaleInContainer) ConnectToNetwork(network *dockertest.Network) error {
return t.container.ConnectToNetwork(network)
}
// PacketFilter returns the current packet filter rules from the client's network map.
// This is useful for verifying that policy changes have propagated to the client.
func (t *TailscaleInContainer) PacketFilter() ([]filter.Match, error) {