From 9b1a6b6c05e3752b14f0ba4cfe6ecd73e6df9825 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Mon, 23 Mar 2026 08:22:41 +0000 Subject: [PATCH] integration: add cap/relay grant peer relay lifecycle test Add ConnectToNetwork to the TailscaleClient interface for multi-network test scenarios and implement peer relay ping support. Use these to test that cap/relay grants correctly enable peer-to-peer relay connections between tagged nodes. Updates #2180 --- integration/grant_cap_test.go | 513 ++++++++++++++++++++++++++++++++++ integration/tailscale.go | 2 + integration/tsic/tsic.go | 7 +- 3 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 integration/grant_cap_test.go diff --git a/integration/grant_cap_test.go b/integration/grant_cap_test.go new file mode 100644 index 00000000..3fedbce1 --- /dev/null +++ b/integration/grant_cap_test.go @@ -0,0 +1,513 @@ +package integration + +import ( + "net/netip" + "strings" + "testing" + "time" + + policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2" + "github.com/juanfont/headscale/hscontrol/types" + "github.com/juanfont/headscale/integration/hsic" + "github.com/juanfont/headscale/integration/tsic" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "tailscale.com/tailcfg" + "tailscale.com/wgengine/filter" +) + +// hasCapMatchInPacketFilter checks if any Match entry in the packet +// filter contains a CapMatch with the given capability name. +func hasCapMatchInPacketFilter(pf []filter.Match, peerCap tailcfg.PeerCapability) bool { + for _, m := range pf { + for _, cm := range m.Caps { + if cm.Cap == peerCap { + return true + } + } + } + + return false +} + +// hasCapMatchForIP checks if any CapMatch with the given capability +// has a Dst prefix that contains the given IP. This validates that +// the cap is directed at the correct node, not just present. +func hasCapMatchForIP(pf []filter.Match, peerCap tailcfg.PeerCapability, ip netip.Addr) bool { + for _, m := range pf { + for _, cm := range m.Caps { + if cm.Cap == peerCap && cm.Dst.Contains(ip) { + return true + } + } + } + + return false +} + +// parsePeerRelay parses a PeerRelay string of the form "ip:port:vni:N" +// and returns the address and VNI. Returns zero values on parse failure. +func parsePeerRelay(pr string) (netip.AddrPort, string, bool) { + // Format: "172.18.0.4:58738:vni:1" + // Split into: host part "172.18.0.4:58738" and vni part "vni:1" + addrStr, vni, ok := strings.Cut(pr, ":vni:") + if !ok { + return netip.AddrPort{}, "", false + } + + ap, err := netip.ParseAddrPort(addrStr) + if err != nil { + return netip.AddrPort{}, "", false + } + + return ap, vni, true +} + +// TestGrantCapRelay validates the full peer relay lifecycle: +// 1. No direct connection between isolated clients +// 2. Cap grants compile correctly (relay + relay-target in packet filters) +// with strict directionality and negative checks +// 3. Peer relay is used instead of DERP (PeerRelay non-empty, valid format) +// 4. Relay goes down -> fallback to DERP (PeerRelay empty, Relay non-empty, +// DERP ping works) +// 5. Relay comes back up -> peer relay resumes (PeerRelay non-empty again) +func TestGrantCapRelay(t *testing.T) { + IntegrationSkip(t) + + assertTimeout := 120 * time.Second + + spec := ScenarioSpec{ + NodesPerUser: 0, + Users: []string{"relay", "clienta", "clientb"}, + Networks: map[string][]string{ + "usernet1": {"clienta"}, + "usernet2": {"clientb"}, + "usernet3": {"relay"}, + }, + Versions: []string{"head"}, + } + + scenario, err := NewScenario(spec) + + require.NoErrorf(t, err, "failed to create scenario: %s", err) + defer scenario.ShutdownAssertNoPanics(t) + + pol := &policyv2.Policy{ + TagOwners: policyv2.TagOwners{ + policyv2.Tag("tag:relay"): policyv2.Owners{usernameOwner("relay@")}, + policyv2.Tag("tag:client-a"): policyv2.Owners{usernameOwner("clienta@")}, + policyv2.Tag("tag:client-b"): policyv2.Owners{usernameOwner("clientb@")}, + }, + Grants: []policyv2.Grant{ + // Grant 1: Basic IP connectivity between all tagged nodes. + { + Sources: policyv2.Aliases{ + tagp("tag:relay"), tagp("tag:client-a"), tagp("tag:client-b"), + }, + Destinations: policyv2.Aliases{ + tagp("tag:relay"), tagp("tag:client-a"), tagp("tag:client-b"), + }, + InternetProtocols: []policyv2.ProtocolPort{ + {Protocol: "*", Ports: []tailcfg.PortRange{tailcfg.PortRangeAny}}, + }, + }, + // Grant 2: Relay cap - clients can use relay node for UDP relaying. + // This generates cap/relay on the relay's filter and cap/relay-target + // (companion) on the clients' filters. + { + Sources: policyv2.Aliases{tagp("tag:client-a"), tagp("tag:client-b")}, + Destinations: policyv2.Aliases{tagp("tag:relay")}, + App: tailcfg.PeerCapMap{ + tailcfg.PeerCapabilityRelay: {tailcfg.RawMessage("{}")}, + }, + }, + }, + } + + headscale, err := scenario.Headscale( + hsic.WithTestName("grant-cap-relay"), + hsic.WithACLPolicy(pol), + hsic.WithPolicyMode(types.PolicyModeDB), + ) + requireNoErrGetHeadscale(t, err) + + usernet1, err := scenario.Network("usernet1") + require.NoError(t, err) + usernet2, err := scenario.Network("usernet2") + require.NoError(t, err) + usernet3, err := scenario.Network("usernet3") + require.NoError(t, err) + + // Create users on headscale server. + _, err = scenario.CreateUser("relay") + require.NoError(t, err) + _, err = scenario.CreateUser("clienta") + require.NoError(t, err) + _, err = scenario.CreateUser("clientb") + require.NoError(t, err) + + userMap, err := headscale.MapUsers() + require.NoError(t, err) + + // --- Create Relay R on usernet3, dual-homed to usernet1+usernet2 --- + relayR, err := scenario.CreateTailscaleNode("head", + tsic.WithNetwork(usernet3), + ) + require.NoError(t, err) + + defer func() { _, _, _ = relayR.Shutdown() }() + + pakRelay, err := scenario.CreatePreAuthKeyWithTags( + userMap["relay"].GetId(), false, false, []string{"tag:relay"}, + ) + require.NoError(t, err) + err = relayR.Login(headscale.GetEndpoint(), pakRelay.GetKey()) + require.NoError(t, err) + err = relayR.WaitForRunning(30 * time.Second) + require.NoError(t, err) + + // Dual-home after registration to avoid duplicate node key generation + // from Docker network interface changes during tailscaled startup. + err = relayR.ConnectToNetwork(usernet1) + require.NoError(t, err) + err = relayR.ConnectToNetwork(usernet2) + require.NoError(t, err) + + // Enable the relay server on the relay node. Without this, the + // relayserver extension loads but RelayServerPort is nil and the + // server never starts listening for allocation requests. + // Port 0 = random unused port. + _, _, err = relayR.Execute([]string{ + "tailscale", "set", "--relay-server-port=0", + }) + require.NoError(t, err) + + // --- Create Client A on usernet1 only --- + clientA, err := scenario.CreateTailscaleNode("head", + tsic.WithNetwork(usernet1), + ) + require.NoError(t, err) + + defer func() { _, _, _ = clientA.Shutdown() }() + + pakClientA, err := scenario.CreatePreAuthKeyWithTags( + userMap["clienta"].GetId(), false, false, []string{"tag:client-a"}, + ) + require.NoError(t, err) + err = clientA.Login(headscale.GetEndpoint(), pakClientA.GetKey()) + require.NoError(t, err) + err = clientA.WaitForRunning(30 * time.Second) + require.NoError(t, err) + + // --- Create Client B on usernet2 only --- + clientB, err := scenario.CreateTailscaleNode("head", + tsic.WithNetwork(usernet2), + ) + require.NoError(t, err) + + defer func() { _, _, _ = clientB.Shutdown() }() + + pakClientB, err := scenario.CreatePreAuthKeyWithTags( + userMap["clientb"].GetId(), false, false, []string{"tag:client-b"}, + ) + require.NoError(t, err) + err = clientB.Login(headscale.GetEndpoint(), pakClientB.GetKey()) + require.NoError(t, err) + err = clientB.WaitForRunning(30 * time.Second) + require.NoError(t, err) + + // ===== Phase 1: Validate isolation and peer visibility ===== + t.Log("Phase 1: Validate network isolation and peer visibility") + + allNodes := []TailscaleClient{relayR, clientA, clientB} + for _, node := range allNodes { + err = node.WaitForPeers(len(allNodes)-1, 60*time.Second, 1*time.Second) + require.NoErrorf(t, err, "node %s failed to see all peers", node.Hostname()) + } + + // Restart all nodes to ensure fresh wireguard config. When nodes + // register sequentially, early peers may arrive without DERP info + // and get permanently skipped in wireguard config. + for _, node := range allNodes { + require.NoError(t, node.Restart()) + require.NoError(t, node.WaitForRunning(30*time.Second)) + } + + for _, node := range allNodes { + err = node.WaitForPeers(len(allNodes)-1, 60*time.Second, 1*time.Second) + require.NoErrorf(t, err, "node %s failed to see all peers after restart", node.Hostname()) + } + + // Capture keys and IPs for assertions. + clientBKey := clientB.MustStatus().Self.PublicKey + clientAKey := clientA.MustStatus().Self.PublicKey + relayIPv4 := relayR.MustIPv4() + clientAIPv4 := clientA.MustIPv4() + clientBIPv4 := clientB.MustIPv4() + + // Verify no direct path between A and B. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + status, err := clientA.Status() + assert.NoError(c, err) + + peerB := status.Peer[clientBKey] + assert.NotNil(c, peerB, "A should see B as a peer") + + if peerB != nil { + assert.Empty(c, peerB.CurAddr, "A->B should have no direct path") + } + }, assertTimeout, 500*time.Millisecond, "A should have no direct path to B") + + assert.EventuallyWithT(t, func(c *assert.CollectT) { + status, err := clientB.Status() + assert.NoError(c, err) + + peerA := status.Peer[clientAKey] + assert.NotNil(c, peerA, "B should see A as a peer") + + if peerA != nil { + assert.Empty(c, peerA.CurAddr, "B->A should have no direct path") + } + }, assertTimeout, 500*time.Millisecond, "B should have no direct path to A") + + // ===== Phase 2: Validate cap grants in packet filters ===== + t.Log("Phase 2: Validate cap grants in packet filters") + + // --- Positive checks: correct caps on correct nodes --- + + // Relay R should have cap/relay targeting the relay's own IP. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := relayR.PacketFilter() + assert.NoError(c, err) + assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelay, relayIPv4), + "Relay R should have cap/relay with Dst matching relay's IP %s", relayIPv4) + }, assertTimeout, 500*time.Millisecond, "R should have cap/relay targeting its own IP") + + // Client A should have cap/relay-target targeting client A's IP. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := clientA.PacketFilter() + assert.NoError(c, err) + assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelayTarget, clientAIPv4), + "Client A should have cap/relay-target with Dst matching A's IP %s", clientAIPv4) + }, assertTimeout, 500*time.Millisecond, "A should have cap/relay-target targeting its own IP") + + // Client B should have cap/relay-target targeting client B's IP. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := clientB.PacketFilter() + assert.NoError(c, err) + assert.True(c, hasCapMatchForIP(pf, tailcfg.PeerCapabilityRelayTarget, clientBIPv4), + "Client B should have cap/relay-target with Dst matching B's IP %s", clientBIPv4) + }, assertTimeout, 500*time.Millisecond, "B should have cap/relay-target targeting its own IP") + + // --- Negative checks: wrong caps must NOT be present --- + + // Relay R should NOT have cap/relay-target (it's a relay server, not a target). + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := relayR.PacketFilter() + assert.NoError(c, err) + assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelayTarget), + "Relay R should NOT have cap/relay-target") + }, 10*time.Second, 500*time.Millisecond, "R should not have cap/relay-target") + + // Client A should NOT have cap/relay (it's a client, not a relay server). + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := clientA.PacketFilter() + assert.NoError(c, err) + assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelay), + "Client A should NOT have cap/relay") + }, 10*time.Second, 500*time.Millisecond, "A should not have cap/relay") + + // Client B should NOT have cap/relay (it's a client, not a relay server). + assert.EventuallyWithT(t, func(c *assert.CollectT) { + pf, err := clientB.PacketFilter() + assert.NoError(c, err) + assert.False(c, hasCapMatchInPacketFilter(pf, tailcfg.PeerCapabilityRelay), + "Client B should NOT have cap/relay") + }, 10*time.Second, 500*time.Millisecond, "B should not have cap/relay") + + // ===== Phase 3: Validate peer relay active (not DERP) ===== + t.Log("Phase 3: Validate peer relay active (not DERP)") + + // Verify PeerRelay is set with valid format and correct relay IPs. + // Relay endpoint allocation is triggered by traffic between peers, + // so we send pings in the check loop to initiate relay discovery. + var peerRelayAtoB, peerRelayBtoA string + + assert.EventuallyWithT(t, func(c *assert.CollectT) { + // Fire a ping to trigger relay path discovery (ignore output). + clientA.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientBIPv4.String()}) //nolint:errcheck + + status, err := clientA.Status() + assert.NoError(c, err) + + peerB := status.Peer[clientBKey] + assert.NotNil(c, peerB, "A should see B as a peer") + + if peerB != nil { + assert.NotEmpty(c, peerB.PeerRelay, + "A->B should use peer relay, not DERP") + assert.Empty(c, peerB.CurAddr, + "A->B should not have direct connection") + + if peerB.PeerRelay != "" { + peerRelayAtoB = peerB.PeerRelay + + // Validate PeerRelay format: ip:port:vni:N + ap, vni, ok := parsePeerRelay(peerB.PeerRelay) + assert.True(c, ok, + "PeerRelay %q should be parseable as ip:port:vni:N", peerB.PeerRelay) + + if ok { + assert.NotZero(c, ap.Port(), + "PeerRelay port should be non-zero") + assert.NotEmpty(c, vni, + "PeerRelay VNI should be non-empty") + } + } + + t.Logf("Phase 3 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active) + } + }, assertTimeout, 2*time.Second, "A should show peer relay to B") + + assert.EventuallyWithT(t, func(c *assert.CollectT) { + clientB.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientAIPv4.String()}) //nolint:errcheck + + status, err := clientB.Status() + assert.NoError(c, err) + + peerA := status.Peer[clientAKey] + assert.NotNil(c, peerA, "B should see A as a peer") + + if peerA != nil { + assert.NotEmpty(c, peerA.PeerRelay, + "B->A should use peer relay, not DERP") + + if peerA.PeerRelay != "" { + peerRelayBtoA = peerA.PeerRelay + + ap, vni, ok := parsePeerRelay(peerA.PeerRelay) + assert.True(c, ok, + "PeerRelay %q should be parseable as ip:port:vni:N", peerA.PeerRelay) + + if ok { + assert.NotZero(c, ap.Port(), + "PeerRelay port should be non-zero") + assert.NotEmpty(c, vni, + "PeerRelay VNI should be non-empty") + } + } + + t.Logf("Phase 3 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active) + } + }, assertTimeout, 2*time.Second, "B should show peer relay to A") + + // Cross-validate: both directions should use the same VNI + // (same relay allocation) but different IPs (dual-homed relay). + if peerRelayAtoB != "" && peerRelayBtoA != "" { + apA, vniA, okA := parsePeerRelay(peerRelayAtoB) + + apB, vniB, okB := parsePeerRelay(peerRelayBtoA) + if okA && okB { + assert.Equal(t, vniA, vniB, + "A->B and B->A should share the same VNI (same relay allocation)") + assert.Equal(t, apA.Port(), apB.Port(), + "A->B and B->A should use the same relay port") + assert.NotEqual(t, apA.Addr(), apB.Addr(), + "A->B and B->A relay IPs should differ (dual-homed relay)") + } + } + + // ===== Phase 4: Bring relay down -> DERP fallback ===== + t.Log("Phase 4: Bring relay down, expect DERP fallback") + + require.NoError(t, relayR.Down()) + + // Verify PeerRelay is gone and DERP is used. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + status, err := clientA.Status() + assert.NoError(c, err) + + peerB := status.Peer[clientBKey] + assert.NotNil(c, peerB, "A should still see B as a peer") + + if peerB != nil { + assert.Empty(c, peerB.PeerRelay, + "A->B peer relay should be gone") + assert.NotEmpty(c, peerB.Relay, + "A->B should fall back to DERP") + t.Logf("Phase 4 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active) + } + }, assertTimeout, 500*time.Millisecond, "A should fall back to DERP for B") + + assert.EventuallyWithT(t, func(c *assert.CollectT) { + status, err := clientB.Status() + assert.NoError(c, err) + + peerA := status.Peer[clientAKey] + assert.NotNil(c, peerA, "B should still see A as a peer") + + if peerA != nil { + assert.Empty(c, peerA.PeerRelay, + "B->A peer relay should be gone") + t.Logf("Phase 4 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active) + } + }, assertTimeout, 500*time.Millisecond, "B should fall back to DERP for A") + + // Verify data plane works via DERP after relay is down. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + err := clientA.Ping( + clientBIPv4.String(), + tsic.WithPingUntilDirect(false), + tsic.WithPingTimeout(2*time.Second), + tsic.WithPingCount(1), + ) + assert.NoError(c, err) + }, assertTimeout, 1*time.Second, "A should reach B via DERP after relay down") + + // ===== Phase 5: Bring relay back up -> peer relay resumes ===== + t.Log("Phase 5: Bring relay back up, expect peer relay to resume") + + require.NoError(t, relayR.Up()) + + err = relayR.WaitForRunning(30 * time.Second) + require.NoError(t, err) + + // Verify peer relay resumes. Ping to trigger relay re-discovery. + assert.EventuallyWithT(t, func(c *assert.CollectT) { + clientA.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientBIPv4.String()}) //nolint:errcheck + + status, err := clientA.Status() + assert.NoError(c, err) + + peerB := status.Peer[clientBKey] + assert.NotNil(c, peerB, "A should see B as a peer") + + if peerB != nil { + assert.NotEmpty(c, peerB.PeerRelay, + "A->B peer relay should resume after R comes back") + t.Logf("Phase 5 - A->B: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerB.PeerRelay, peerB.Relay, peerB.CurAddr, peerB.Active) + } + }, assertTimeout, 2*time.Second, "A should resume peer relay to B") + + assert.EventuallyWithT(t, func(c *assert.CollectT) { + clientB.Execute([]string{"tailscale", "ping", "--c=1", "--timeout=1s", clientAIPv4.String()}) //nolint:errcheck + + status, err := clientB.Status() + assert.NoError(c, err) + + peerA := status.Peer[clientAKey] + assert.NotNil(c, peerA, "B should see A as a peer") + + if peerA != nil { + assert.NotEmpty(c, peerA.PeerRelay, + "B->A peer relay should resume after R comes back") + t.Logf("Phase 5 - B->A: PeerRelay=%q Relay=%q CurAddr=%q Active=%v", + peerA.PeerRelay, peerA.Relay, peerA.CurAddr, peerA.Active) + } + }, assertTimeout, 2*time.Second, "B should resume peer relay to A") +} diff --git a/integration/tailscale.go b/integration/tailscale.go index f397133e..4c9a761b 100644 --- a/integration/tailscale.go +++ b/integration/tailscale.go @@ -10,6 +10,7 @@ import ( "github.com/juanfont/headscale/hscontrol/util" "github.com/juanfont/headscale/integration/dockertestutil" "github.com/juanfont/headscale/integration/tsic" + "github.com/ory/dockertest/v3" "tailscale.com/ipn/ipnstate" "tailscale.com/net/netcheck" "tailscale.com/types/key" @@ -56,6 +57,7 @@ type TailscaleClient interface { MustID() types.NodeID ReadFile(path string) ([]byte, error) PacketFilter() ([]filter.Match, error) + ConnectToNetwork(network *dockertest.Network) error // FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client // and a bool indicating if the clients online count and peer count is equal. diff --git a/integration/tsic/tsic.go b/integration/tsic/tsic.go index 29f276e2..0a75e813 100644 --- a/integration/tsic/tsic.go +++ b/integration/tsic/tsic.go @@ -1403,7 +1403,7 @@ func (t *TailscaleInContainer) Ping(hostnameOrIP string, opts ...PingOption) err } if !args.direct { - if strings.Contains(result, "via DERP") { + if strings.Contains(result, "via DERP") || strings.Contains(result, "via relay") { return nil } else { return errTailscalePingNotDERP @@ -1625,6 +1625,11 @@ func (t *TailscaleInContainer) GetNodePrivateKey() (*key.NodePrivate, error) { return &p.Persist.PrivateNodeKey, nil } +// ConnectToNetwork connects the Tailscale container to an additional Docker network. +func (t *TailscaleInContainer) ConnectToNetwork(network *dockertest.Network) error { + return t.container.ConnectToNetwork(network) +} + // PacketFilter returns the current packet filter rules from the client's network map. // This is useful for verifying that policy changes have propagated to the client. func (t *TailscaleInContainer) PacketFilter() ([]filter.Match, error) {