package integration import ( "bufio" "bytes" "fmt" "io" "net/netip" "strconv" "strings" "sync" "testing" "time" "github.com/cenkalti/backoff/v5" "github.com/google/go-cmp/cmp" v1 "github.com/juanfont/headscale/gen/go/headscale/v1" policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2" "github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/util" "github.com/juanfont/headscale/integration/integrationutil" "github.com/juanfont/headscale/integration/tsic" "github.com/oauth2-proxy/mockoidc" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/maps" "golang.org/x/exp/slices" "tailscale.com/tailcfg" "tailscale.com/types/ptr" ) const ( // derpPingTimeout defines the timeout for individual DERP ping operations // Used in DERP connectivity tests to verify relay server communication. derpPingTimeout = 2 * time.Second // derpPingCount defines the number of ping attempts for DERP connectivity tests // Higher count provides better reliability assessment of DERP connectivity. derpPingCount = 10 // TimestampFormat is the standard timestamp format used across all integration tests // Format: "2006-01-02T15-04-05.999999999" provides high precision timestamps // suitable for debugging and log correlation in integration tests. TimestampFormat = "2006-01-02T15-04-05.999999999" // TimestampFormatRunID is used for generating unique run identifiers // Format: "20060102-150405" provides compact date-time for file/directory names. TimestampFormatRunID = "20060102-150405" ) // NodeSystemStatus represents the status of a node across different systems type NodeSystemStatus struct { Batcher bool BatcherConnCount int MapResponses bool NodeStore bool } // requireNotNil validates that an object is not nil and fails the test if it is. // This helper provides consistent error messaging for nil checks in integration tests. func requireNotNil(t *testing.T, object interface{}) { t.Helper() require.NotNil(t, object) } // requireNoErrHeadscaleEnv validates that headscale environment creation succeeded. // Provides specific error context for headscale environment setup failures. func requireNoErrHeadscaleEnv(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to create headscale environment") } // requireNoErrGetHeadscale validates that headscale server retrieval succeeded. // Provides specific error context for headscale server access failures. func requireNoErrGetHeadscale(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to get headscale") } // requireNoErrListClients validates that client listing operations succeeded. // Provides specific error context for client enumeration failures. func requireNoErrListClients(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to list clients") } // requireNoErrListClientIPs validates that client IP retrieval succeeded. // Provides specific error context for client IP address enumeration failures. func requireNoErrListClientIPs(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to get client IPs") } // requireNoErrSync validates that client synchronization operations succeeded. // Provides specific error context for client sync failures across the network. func requireNoErrSync(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to have all clients sync up") } // requireNoErrListFQDN validates that FQDN listing operations succeeded. // Provides specific error context for DNS name enumeration failures. func requireNoErrListFQDN(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to list FQDNs") } // requireNoErrLogout validates that tailscale node logout operations succeeded. // Provides specific error context for client logout failures. func requireNoErrLogout(t *testing.T, err error) { t.Helper() require.NoError(t, err, "failed to log out tailscale nodes") } // collectExpectedNodeIDs extracts node IDs from a list of TailscaleClients for validation purposes func collectExpectedNodeIDs(t *testing.T, clients []TailscaleClient) []types.NodeID { t.Helper() expectedNodes := make([]types.NodeID, 0, len(clients)) for _, client := range clients { status := client.MustStatus() nodeID, err := strconv.ParseUint(string(status.Self.ID), 10, 64) require.NoError(t, err) expectedNodes = append(expectedNodes, types.NodeID(nodeID)) } return expectedNodes } // validateInitialConnection performs comprehensive validation after initial client login. // Validates that all nodes are online and have proper NetInfo/DERP configuration, // essential for ensuring successful initial connection state in relogin tests. func validateInitialConnection(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) { t.Helper() requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", 120*time.Second) requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after initial login", 3*time.Minute) } // validateLogoutComplete performs comprehensive validation after client logout. // Ensures all nodes are properly offline across all headscale systems, // critical for validating clean logout state in relogin tests. func validateLogoutComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) { t.Helper() requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", 120*time.Second) } // validateReloginComplete performs comprehensive validation after client relogin. // Validates that all nodes are back online with proper NetInfo/DERP configuration, // ensuring successful relogin state restoration in integration tests. func validateReloginComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) { t.Helper() requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", 120*time.Second) requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", 3*time.Minute) } // requireAllClientsOnline validates that all nodes are online/offline across all headscale systems // requireAllClientsOnline verifies all expected nodes are in the specified online state across all systems func requireAllClientsOnline(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) { t.Helper() startTime := time.Now() stateStr := "offline" if expectedOnline { stateStr = "online" } t.Logf("requireAllSystemsOnline: Starting %s validation for %d nodes at %s - %s", stateStr, len(expectedNodes), startTime.Format(TimestampFormat), message) if expectedOnline { // For online validation, use the existing logic with full timeout requireAllClientsOnlineWithSingleTimeout(t, headscale, expectedNodes, expectedOnline, message, timeout) } else { // For offline validation, use staged approach with component-specific timeouts requireAllClientsOfflineStaged(t, headscale, expectedNodes, message, timeout) } endTime := time.Now() t.Logf("requireAllSystemsOnline: Completed %s validation for %d nodes at %s - Duration: %s - %s", stateStr, len(expectedNodes), endTime.Format(TimestampFormat), endTime.Sub(startTime), message) } // requireAllClientsOnlineWithSingleTimeout is the original validation logic for online state func requireAllClientsOnlineWithSingleTimeout(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) { t.Helper() var prevReport string require.EventuallyWithT(t, func(c *assert.CollectT) { // Get batcher state debugInfo, err := headscale.DebugBatcher() assert.NoError(c, err, "Failed to get batcher debug info") if err != nil { return } // Get map responses mapResponses, err := headscale.GetAllMapReponses() assert.NoError(c, err, "Failed to get map responses") if err != nil { return } // Get nodestore state nodeStore, err := headscale.DebugNodeStore() assert.NoError(c, err, "Failed to get nodestore debug info") if err != nil { return } // Validate that all expected nodes are present in nodeStore for _, nodeID := range expectedNodes { _, exists := nodeStore[nodeID] assert.True(c, exists, "Expected node %d not found in nodeStore", nodeID) } // Check that we have map responses for expected nodes mapResponseCount := len(mapResponses) expectedCount := len(expectedNodes) assert.GreaterOrEqual(c, mapResponseCount, expectedCount, "MapResponses insufficient - expected at least %d responses, got %d", expectedCount, mapResponseCount) // Build status map for each node nodeStatus := make(map[types.NodeID]NodeSystemStatus) // Initialize all expected nodes for _, nodeID := range expectedNodes { nodeStatus[nodeID] = NodeSystemStatus{} } // Check batcher state for expected nodes for _, nodeID := range expectedNodes { nodeIDStr := fmt.Sprintf("%d", nodeID) if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists { if status, exists := nodeStatus[nodeID]; exists { status.Batcher = nodeInfo.Connected status.BatcherConnCount = nodeInfo.ActiveConnections nodeStatus[nodeID] = status } } else { // Node not found in batcher, mark as disconnected if status, exists := nodeStatus[nodeID]; exists { status.Batcher = false status.BatcherConnCount = 0 nodeStatus[nodeID] = status } } } // Check map responses using buildExpectedOnlineMap onlineFromMaps := make(map[types.NodeID]bool) onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses) // For single node scenarios, we can't validate peer visibility since there are no peers if len(expectedNodes) == 1 { // For single node, just check that we have map responses for the node for nodeID := range nodeStatus { if _, exists := onlineMap[nodeID]; exists { onlineFromMaps[nodeID] = true } else { onlineFromMaps[nodeID] = false } } } else { // Multi-node scenario: check peer visibility for nodeID := range nodeStatus { // Initialize as offline - will be set to true only if visible in all relevant peer maps onlineFromMaps[nodeID] = false // Count how many peer maps should show this node expectedPeerMaps := 0 foundOnlinePeerMaps := 0 for id, peerMap := range onlineMap { if id == nodeID { continue // Skip self-references } expectedPeerMaps++ if online, exists := peerMap[nodeID]; exists && online { foundOnlinePeerMaps++ } } // Node is considered online if it appears online in all peer maps // (or if there are no peer maps to check) if expectedPeerMaps == 0 || foundOnlinePeerMaps == expectedPeerMaps { onlineFromMaps[nodeID] = true } } } assert.Lenf(c, onlineFromMaps, expectedCount, "MapResponses missing nodes in status check") // Update status with map response data for nodeID, online := range onlineFromMaps { if status, exists := nodeStatus[nodeID]; exists { status.MapResponses = online nodeStatus[nodeID] = status } } // Check nodestore state for expected nodes for _, nodeID := range expectedNodes { if node, exists := nodeStore[nodeID]; exists { if status, exists := nodeStatus[nodeID]; exists { // Check if node is online in nodestore status.NodeStore = node.IsOnline != nil && *node.IsOnline nodeStatus[nodeID] = status } } } // Verify all systems show nodes in expected state and report failures allMatch := true var failureReport strings.Builder ids := types.NodeIDs(maps.Keys(nodeStatus)) slices.Sort(ids) for _, nodeID := range ids { status := nodeStatus[nodeID] systemsMatch := (status.Batcher == expectedOnline) && (status.MapResponses == expectedOnline) && (status.NodeStore == expectedOnline) if !systemsMatch { allMatch = false stateStr := "offline" if expectedOnline { stateStr = "online" } failureReport.WriteString(fmt.Sprintf("node:%d is not fully %s (timestamp: %s):\n", nodeID, stateStr, time.Now().Format(TimestampFormat))) failureReport.WriteString(fmt.Sprintf(" - batcher: %t (expected: %t)\n", status.Batcher, expectedOnline)) failureReport.WriteString(fmt.Sprintf(" - conn count: %d\n", status.BatcherConnCount)) failureReport.WriteString(fmt.Sprintf(" - mapresponses: %t (expected: %t, down with at least one peer)\n", status.MapResponses, expectedOnline)) failureReport.WriteString(fmt.Sprintf(" - nodestore: %t (expected: %t)\n", status.NodeStore, expectedOnline)) } } if !allMatch { if diff := cmp.Diff(prevReport, failureReport.String()); diff != "" { t.Logf("Node state validation report changed at %s:", time.Now().Format(TimestampFormat)) t.Logf("Previous report:\n%s", prevReport) t.Logf("Current report:\n%s", failureReport.String()) t.Logf("Report diff:\n%s", diff) prevReport = failureReport.String() } failureReport.WriteString(fmt.Sprintf("validation_timestamp: %s\n", time.Now().Format(TimestampFormat))) // Note: timeout_remaining not available in this context assert.Fail(c, failureReport.String()) } stateStr := "offline" if expectedOnline { stateStr = "online" } assert.True(c, allMatch, fmt.Sprintf("Not all %d nodes are %s across all systems (batcher, mapresponses, nodestore)", len(expectedNodes), stateStr)) }, timeout, 2*time.Second, message) } // requireAllClientsOfflineStaged validates offline state with staged timeouts for different components func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, totalTimeout time.Duration) { t.Helper() // Stage 1: Verify batcher disconnection (should be immediate) t.Logf("Stage 1: Verifying batcher disconnection for %d nodes", len(expectedNodes)) require.EventuallyWithT(t, func(c *assert.CollectT) { debugInfo, err := headscale.DebugBatcher() assert.NoError(c, err, "Failed to get batcher debug info") if err != nil { return } allBatcherOffline := true for _, nodeID := range expectedNodes { nodeIDStr := fmt.Sprintf("%d", nodeID) if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists && nodeInfo.Connected { allBatcherOffline = false assert.False(c, nodeInfo.Connected, "Node %d should not be connected in batcher", nodeID) } } assert.True(c, allBatcherOffline, "All nodes should be disconnected from batcher") }, 15*time.Second, 1*time.Second, "batcher disconnection validation") // Stage 2: Verify nodestore offline status (up to 15 seconds due to disconnect detection delay) t.Logf("Stage 2: Verifying nodestore offline status for %d nodes (allowing for 10s disconnect detection delay)", len(expectedNodes)) require.EventuallyWithT(t, func(c *assert.CollectT) { nodeStore, err := headscale.DebugNodeStore() assert.NoError(c, err, "Failed to get nodestore debug info") if err != nil { return } allNodeStoreOffline := true for _, nodeID := range expectedNodes { if node, exists := nodeStore[nodeID]; exists { isOnline := node.IsOnline != nil && *node.IsOnline if isOnline { allNodeStoreOffline = false assert.False(c, isOnline, "Node %d should be offline in nodestore", nodeID) } } } assert.True(c, allNodeStoreOffline, "All nodes should be offline in nodestore") }, 20*time.Second, 1*time.Second, "nodestore offline validation") // Stage 3: Verify map response propagation (longest delay due to peer update timing) t.Logf("Stage 3: Verifying map response propagation for %d nodes (allowing for peer map update delays)", len(expectedNodes)) require.EventuallyWithT(t, func(c *assert.CollectT) { mapResponses, err := headscale.GetAllMapReponses() assert.NoError(c, err, "Failed to get map responses") if err != nil { return } onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses) allMapResponsesOffline := true if len(expectedNodes) == 1 { // Single node: check if it appears in map responses for nodeID := range onlineMap { if slices.Contains(expectedNodes, nodeID) { allMapResponsesOffline = false assert.False(c, true, "Node %d should not appear in map responses", nodeID) } } } else { // Multi-node: check peer visibility for _, nodeID := range expectedNodes { for id, peerMap := range onlineMap { if id == nodeID { continue // Skip self-references } if online, exists := peerMap[nodeID]; exists && online { allMapResponsesOffline = false assert.False(c, online, "Node %d should not be visible in node %d's map response", nodeID, id) } } } } assert.True(c, allMapResponsesOffline, "All nodes should be absent from peer map responses") }, 60*time.Second, 2*time.Second, "map response propagation validation") t.Logf("All stages completed: nodes are fully offline across all systems") } // requireAllClientsNetInfoAndDERP validates that all nodes have NetInfo in the database // and a valid DERP server based on the NetInfo. This function follows the pattern of // requireAllClientsOnline by using hsic.DebugNodeStore to get the database state. func requireAllClientsNetInfoAndDERP(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, timeout time.Duration) { t.Helper() startTime := time.Now() t.Logf("requireAllClientsNetInfoAndDERP: Starting NetInfo/DERP validation for %d nodes at %s - %s", len(expectedNodes), startTime.Format(TimestampFormat), message) require.EventuallyWithT(t, func(c *assert.CollectT) { // Get nodestore state nodeStore, err := headscale.DebugNodeStore() assert.NoError(c, err, "Failed to get nodestore debug info") if err != nil { return } // Validate that all expected nodes are present in nodeStore for _, nodeID := range expectedNodes { _, exists := nodeStore[nodeID] assert.True(c, exists, "Expected node %d not found in nodeStore during NetInfo validation", nodeID) } // Check each expected node for _, nodeID := range expectedNodes { node, exists := nodeStore[nodeID] assert.True(c, exists, "Node %d not found in nodestore during NetInfo validation", nodeID) if !exists { continue } // Validate that the node has Hostinfo assert.NotNil(c, node.Hostinfo, "Node %d (%s) should have Hostinfo for NetInfo validation", nodeID, node.Hostname) if node.Hostinfo == nil { t.Logf("Node %d (%s) missing Hostinfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat)) continue } // Validate that the node has NetInfo assert.NotNil(c, node.Hostinfo.NetInfo, "Node %d (%s) should have NetInfo in Hostinfo for DERP connectivity", nodeID, node.Hostname) if node.Hostinfo.NetInfo == nil { t.Logf("Node %d (%s) missing NetInfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat)) continue } // Validate that the node has a valid DERP server (PreferredDERP should be > 0) preferredDERP := node.Hostinfo.NetInfo.PreferredDERP assert.Greater(c, preferredDERP, 0, "Node %d (%s) should have a valid DERP server (PreferredDERP > 0) for relay connectivity, got %d", nodeID, node.Hostname, preferredDERP) t.Logf("Node %d (%s) has valid NetInfo with DERP server %d at %s", nodeID, node.Hostname, preferredDERP, time.Now().Format(TimestampFormat)) } }, timeout, 5*time.Second, message) endTime := time.Now() duration := endTime.Sub(startTime) t.Logf("requireAllClientsNetInfoAndDERP: Completed NetInfo/DERP validation for %d nodes at %s - Duration: %v - %s", len(expectedNodes), endTime.Format(TimestampFormat), duration, message) } // assertLastSeenSet validates that a node has a non-nil LastSeen timestamp. // Critical for ensuring node activity tracking is functioning properly. func assertLastSeenSet(t *testing.T, node *v1.Node) { assert.NotNil(t, node) assert.NotNil(t, node.GetLastSeen()) } func assertLastSeenSetWithCollect(c *assert.CollectT, node *v1.Node) { assert.NotNil(c, node) assert.NotNil(c, node.GetLastSeen()) } // assertTailscaleNodesLogout verifies that all provided Tailscale clients // are in the logged-out state (NeedsLogin). func assertTailscaleNodesLogout(t assert.TestingT, clients []TailscaleClient) { if h, ok := t.(interface{ Helper() }); ok { h.Helper() } for _, client := range clients { status, err := client.Status() assert.NoError(t, err, "failed to get status for client %s", client.Hostname()) assert.Equal(t, "NeedsLogin", status.BackendState, "client %s should be logged out", client.Hostname()) } } // pingAllHelper performs ping tests between all clients and addresses, returning success count. // This is used to validate network connectivity in integration tests. // Returns the total number of successful ping operations. func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int { t.Helper() success := 0 for _, client := range clients { for _, addr := range addrs { err := client.Ping(addr, opts...) if err != nil { t.Errorf("failed to ping %s from %s: %s", addr, client.Hostname(), err) } else { success++ } } } return success } // pingDerpAllHelper performs DERP-based ping tests between all clients and addresses. // This specifically tests connectivity through DERP relay servers, which is important // for validating NAT traversal and relay functionality. Returns success count. func pingDerpAllHelper(t *testing.T, clients []TailscaleClient, addrs []string) int { t.Helper() success := 0 for _, client := range clients { for _, addr := range addrs { if isSelfClient(client, addr) { continue } err := client.Ping( addr, tsic.WithPingTimeout(derpPingTimeout), tsic.WithPingCount(derpPingCount), tsic.WithPingUntilDirect(false), ) if err != nil { t.Logf("failed to ping %s from %s: %s", addr, client.Hostname(), err) } else { success++ } } } return success } // isSelfClient determines if the given address belongs to the client itself. // Used to avoid self-ping operations in connectivity tests by checking // hostname and IP address matches. func isSelfClient(client TailscaleClient, addr string) bool { if addr == client.Hostname() { return true } ips, err := client.IPs() if err != nil { return false } for _, ip := range ips { if ip.String() == addr { return true } } return false } // assertClientsState validates the status and netmap of a list of clients for general connectivity. // Runs parallel validation of status, netcheck, and netmap for all clients to ensure // they have proper network configuration for all-to-all connectivity tests. func assertClientsState(t *testing.T, clients []TailscaleClient) { t.Helper() var wg sync.WaitGroup for _, client := range clients { wg.Add(1) c := client // Avoid loop pointer go func() { defer wg.Done() assertValidStatus(t, c) assertValidNetcheck(t, c) assertValidNetmap(t, c) }() } t.Logf("waiting for client state checks to finish") wg.Wait() } // assertValidNetmap validates that a client's netmap has all required fields for proper operation. // Checks self node and all peers for essential networking data including hostinfo, addresses, // endpoints, and DERP configuration. Skips validation for Tailscale versions below 1.56. // This test is not suitable for ACL/partial connection tests. func assertValidNetmap(t *testing.T, client TailscaleClient) { t.Helper() if !util.TailscaleVersionNewerOrEqual("1.56", client.Version()) { t.Logf("%q has version %q, skipping netmap check...", client.Hostname(), client.Version()) return } t.Logf("Checking netmap of %q", client.Hostname()) assert.EventuallyWithT(t, func(c *assert.CollectT) { netmap, err := client.Netmap() assert.NoError(c, err, "getting netmap for %q", client.Hostname()) assert.Truef(c, netmap.SelfNode.Hostinfo().Valid(), "%q does not have Hostinfo", client.Hostname()) if hi := netmap.SelfNode.Hostinfo(); hi.Valid() { assert.LessOrEqual(c, 1, netmap.SelfNode.Hostinfo().Services().Len(), "%q does not have enough services, got: %v", client.Hostname(), netmap.SelfNode.Hostinfo().Services()) } assert.NotEmptyf(c, netmap.SelfNode.AllowedIPs(), "%q does not have any allowed IPs", client.Hostname()) assert.NotEmptyf(c, netmap.SelfNode.Addresses(), "%q does not have any addresses", client.Hostname()) assert.Truef(c, netmap.SelfNode.Online().Get(), "%q is not online", client.Hostname()) assert.Falsef(c, netmap.SelfNode.Key().IsZero(), "%q does not have a valid NodeKey", client.Hostname()) assert.Falsef(c, netmap.SelfNode.Machine().IsZero(), "%q does not have a valid MachineKey", client.Hostname()) assert.Falsef(c, netmap.SelfNode.DiscoKey().IsZero(), "%q does not have a valid DiscoKey", client.Hostname()) for _, peer := range netmap.Peers { assert.NotEqualf(c, "127.3.3.40:0", peer.LegacyDERPString(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.LegacyDERPString()) assert.NotEqualf(c, 0, peer.HomeDERP(), "peer (%s) has no home DERP in %q's netmap, got: %d", peer.ComputedName(), client.Hostname(), peer.HomeDERP()) assert.Truef(c, peer.Hostinfo().Valid(), "peer (%s) of %q does not have Hostinfo", peer.ComputedName(), client.Hostname()) if hi := peer.Hostinfo(); hi.Valid() { assert.LessOrEqualf(c, 3, peer.Hostinfo().Services().Len(), "peer (%s) of %q does not have enough services, got: %v", peer.ComputedName(), client.Hostname(), peer.Hostinfo().Services()) // Netinfo is not always set // assert.Truef(c, hi.NetInfo().Valid(), "peer (%s) of %q does not have NetInfo", peer.ComputedName(), client.Hostname()) if ni := hi.NetInfo(); ni.Valid() { assert.NotEqualf(c, 0, ni.PreferredDERP(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.Hostinfo().NetInfo().PreferredDERP()) } } assert.NotEmptyf(c, peer.Endpoints(), "peer (%s) of %q does not have any endpoints", peer.ComputedName(), client.Hostname()) assert.NotEmptyf(c, peer.AllowedIPs(), "peer (%s) of %q does not have any allowed IPs", peer.ComputedName(), client.Hostname()) assert.NotEmptyf(c, peer.Addresses(), "peer (%s) of %q does not have any addresses", peer.ComputedName(), client.Hostname()) assert.Truef(c, peer.Online().Get(), "peer (%s) of %q is not online", peer.ComputedName(), client.Hostname()) assert.Falsef(c, peer.Key().IsZero(), "peer (%s) of %q does not have a valid NodeKey", peer.ComputedName(), client.Hostname()) assert.Falsef(c, peer.Machine().IsZero(), "peer (%s) of %q does not have a valid MachineKey", peer.ComputedName(), client.Hostname()) assert.Falsef(c, peer.DiscoKey().IsZero(), "peer (%s) of %q does not have a valid DiscoKey", peer.ComputedName(), client.Hostname()) } }, 10*time.Second, 200*time.Millisecond, "Waiting for valid netmap for %q", client.Hostname()) } // assertValidStatus validates that a client's status has all required fields for proper operation. // Checks self and peer status for essential data including hostinfo, tailscale IPs, endpoints, // and network map presence. This test is not suitable for ACL/partial connection tests. func assertValidStatus(t *testing.T, client TailscaleClient) { t.Helper() status, err := client.Status(true) if err != nil { t.Fatalf("getting status for %q: %s", client.Hostname(), err) } assert.NotEmptyf(t, status.Self.HostName, "%q does not have HostName set, likely missing Hostinfo", client.Hostname()) assert.NotEmptyf(t, status.Self.OS, "%q does not have OS set, likely missing Hostinfo", client.Hostname()) assert.NotEmptyf(t, status.Self.Relay, "%q does not have a relay, likely missing Hostinfo/Netinfo", client.Hostname()) assert.NotEmptyf(t, status.Self.TailscaleIPs, "%q does not have Tailscale IPs", client.Hostname()) // This seem to not appear until version 1.56 if status.Self.AllowedIPs != nil { assert.NotEmptyf(t, status.Self.AllowedIPs, "%q does not have any allowed IPs", client.Hostname()) } assert.NotEmptyf(t, status.Self.Addrs, "%q does not have any endpoints", client.Hostname()) assert.Truef(t, status.Self.Online, "%q is not online", client.Hostname()) assert.Truef(t, status.Self.InNetworkMap, "%q is not in network map", client.Hostname()) // This isn't really relevant for Self as it won't be in its own socket/wireguard. // assert.Truef(t, status.Self.InMagicSock, "%q is not tracked by magicsock", client.Hostname()) // assert.Truef(t, status.Self.InEngine, "%q is not in wireguard engine", client.Hostname()) for _, peer := range status.Peer { assert.NotEmptyf(t, peer.HostName, "peer (%s) of %q does not have HostName set, likely missing Hostinfo", peer.DNSName, client.Hostname()) assert.NotEmptyf(t, peer.OS, "peer (%s) of %q does not have OS set, likely missing Hostinfo", peer.DNSName, client.Hostname()) assert.NotEmptyf(t, peer.Relay, "peer (%s) of %q does not have a relay, likely missing Hostinfo/Netinfo", peer.DNSName, client.Hostname()) assert.NotEmptyf(t, peer.TailscaleIPs, "peer (%s) of %q does not have Tailscale IPs", peer.DNSName, client.Hostname()) // This seem to not appear until version 1.56 if peer.AllowedIPs != nil { assert.NotEmptyf(t, peer.AllowedIPs, "peer (%s) of %q does not have any allowed IPs", peer.DNSName, client.Hostname()) } // Addrs does not seem to appear in the status from peers. // assert.NotEmptyf(t, peer.Addrs, "peer (%s) of %q does not have any endpoints", peer.DNSName, client.Hostname()) assert.Truef(t, peer.Online, "peer (%s) of %q is not online", peer.DNSName, client.Hostname()) assert.Truef(t, peer.InNetworkMap, "peer (%s) of %q is not in network map", peer.DNSName, client.Hostname()) assert.Truef(t, peer.InMagicSock, "peer (%s) of %q is not tracked by magicsock", peer.DNSName, client.Hostname()) // TODO(kradalby): InEngine is only true when a proper tunnel is set up, // there might be some interesting stuff to test here in the future. // assert.Truef(t, peer.InEngine, "peer (%s) of %q is not in wireguard engine", peer.DNSName, client.Hostname()) } } // assertValidNetcheck validates that a client has a proper DERP relay configured. // Ensures the client has discovered and selected a DERP server for relay functionality, // which is essential for NAT traversal and connectivity in restricted networks. func assertValidNetcheck(t *testing.T, client TailscaleClient) { t.Helper() report, err := client.Netcheck() if err != nil { t.Fatalf("getting status for %q: %s", client.Hostname(), err) } assert.NotEqualf(t, 0, report.PreferredDERP, "%q does not have a DERP relay", client.Hostname()) } // assertCommandOutputContains executes a command with exponential backoff retry until the output // contains the expected string or timeout is reached (10 seconds). // This implements eventual consistency patterns and should be used instead of time.Sleep // before executing commands that depend on network state propagation. // // Timeout: 10 seconds with exponential backoff // Use cases: DNS resolution, route propagation, policy updates. func assertCommandOutputContains(t *testing.T, c TailscaleClient, command []string, contains string) { t.Helper() _, err := backoff.Retry(t.Context(), func() (struct{}, error) { stdout, stderr, err := c.Execute(command) if err != nil { return struct{}{}, fmt.Errorf("executing command, stdout: %q stderr: %q, err: %w", stdout, stderr, err) } if !strings.Contains(stdout, contains) { return struct{}{}, fmt.Errorf("executing command, expected string %q not found in %q", contains, stdout) } return struct{}{}, nil }, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(10*time.Second)) assert.NoError(t, err) } // dockertestMaxWait returns the maximum wait time for Docker-based test operations. // Uses longer timeouts in CI environments to account for slower resource allocation // and higher system load during automated testing. func dockertestMaxWait() time.Duration { wait := 300 * time.Second //nolint if util.IsCI() { wait = 600 * time.Second //nolint } return wait } // didClientUseWebsocketForDERP analyzes client logs to determine if WebSocket was used for DERP. // Searches for WebSocket connection indicators in client logs to validate // DERP relay communication method for debugging connectivity issues. func didClientUseWebsocketForDERP(t *testing.T, client TailscaleClient) bool { t.Helper() buf := &bytes.Buffer{} err := client.WriteLogs(buf, buf) if err != nil { t.Fatalf("failed to fetch client logs: %s: %s", client.Hostname(), err) } count, err := countMatchingLines(buf, func(line string) bool { return strings.Contains(line, "websocket: connected to ") }) if err != nil { t.Fatalf("failed to process client logs: %s: %s", client.Hostname(), err) } return count > 0 } // countMatchingLines counts lines in a reader that match the given predicate function. // Uses optimized buffering for log analysis and provides flexible line-by-line // filtering for log parsing and pattern matching in integration tests. func countMatchingLines(in io.Reader, predicate func(string) bool) (int, error) { count := 0 scanner := bufio.NewScanner(in) { const logBufferInitialSize = 1024 << 10 // preallocate 1 MiB buff := make([]byte, logBufferInitialSize) scanner.Buffer(buff, len(buff)) scanner.Split(bufio.ScanLines) } for scanner.Scan() { if predicate(scanner.Text()) { count += 1 } } return count, scanner.Err() } // wildcard returns a wildcard alias (*) for use in policy v2 configurations. // Provides a convenient helper for creating permissive policy rules. func wildcard() policyv2.Alias { return policyv2.Wildcard } // usernamep returns a pointer to a Username as an Alias for policy v2 configurations. // Used in ACL rules to reference specific users in network access policies. func usernamep(name string) policyv2.Alias { return ptr.To(policyv2.Username(name)) } // hostp returns a pointer to a Host as an Alias for policy v2 configurations. // Used in ACL rules to reference specific hosts in network access policies. func hostp(name string) policyv2.Alias { return ptr.To(policyv2.Host(name)) } // groupp returns a pointer to a Group as an Alias for policy v2 configurations. // Used in ACL rules to reference user groups in network access policies. func groupp(name string) policyv2.Alias { return ptr.To(policyv2.Group(name)) } // tagp returns a pointer to a Tag as an Alias for policy v2 configurations. // Used in ACL rules to reference node tags in network access policies. func tagp(name string) policyv2.Alias { return ptr.To(policyv2.Tag(name)) } // prefixp returns a pointer to a Prefix from a CIDR string for policy v2 configurations. // Converts CIDR notation to policy prefix format for network range specifications. func prefixp(cidr string) policyv2.Alias { prefix := netip.MustParsePrefix(cidr) return ptr.To(policyv2.Prefix(prefix)) } // aliasWithPorts creates an AliasWithPorts structure from an alias and port ranges. // Combines network targets with specific port restrictions for fine-grained // access control in policy v2 configurations. func aliasWithPorts(alias policyv2.Alias, ports ...tailcfg.PortRange) policyv2.AliasWithPorts { return policyv2.AliasWithPorts{ Alias: alias, Ports: ports, } } // usernameOwner returns a Username as an Owner for use in TagOwners policies. // Specifies which users can assign and manage specific tags in ACL configurations. func usernameOwner(name string) policyv2.Owner { return ptr.To(policyv2.Username(name)) } // groupOwner returns a Group as an Owner for use in TagOwners policies. // Specifies which groups can assign and manage specific tags in ACL configurations. func groupOwner(name string) policyv2.Owner { return ptr.To(policyv2.Group(name)) } // usernameApprover returns a Username as an AutoApprover for subnet route policies. // Specifies which users can automatically approve subnet route advertisements. func usernameApprover(name string) policyv2.AutoApprover { return ptr.To(policyv2.Username(name)) } // groupApprover returns a Group as an AutoApprover for subnet route policies. // Specifies which groups can automatically approve subnet route advertisements. func groupApprover(name string) policyv2.AutoApprover { return ptr.To(policyv2.Group(name)) } // tagApprover returns a Tag as an AutoApprover for subnet route policies. // Specifies which tagged nodes can automatically approve subnet route advertisements. func tagApprover(name string) policyv2.AutoApprover { return ptr.To(policyv2.Tag(name)) } // oidcMockUser creates a MockUser for OIDC authentication testing. // Generates consistent test user data with configurable email verification status // for validating OIDC integration flows in headscale authentication tests. func oidcMockUser(username string, emailVerified bool) mockoidc.MockUser { return mockoidc.MockUser{ Subject: username, PreferredUsername: username, Email: username + "@headscale.net", EmailVerified: emailVerified, } } // GetUserByName retrieves a user by name from the headscale server. // This is a common pattern used when creating preauth keys or managing users. func GetUserByName(headscale ControlServer, username string) (*v1.User, error) { users, err := headscale.ListUsers() if err != nil { return nil, fmt.Errorf("failed to list users: %w", err) } for _, u := range users { if u.GetName() == username { return u, nil } } return nil, fmt.Errorf("user %s not found", username) } // FindNewClient finds a client that is in the new list but not in the original list. // This is useful when dynamically adding nodes during tests and needing to identify // which client was just added. func FindNewClient(original, updated []TailscaleClient) (TailscaleClient, error) { for _, client := range updated { isOriginal := false for _, origClient := range original { if client.Hostname() == origClient.Hostname() { isOriginal = true break } } if !isOriginal { return client, nil } } return nil, fmt.Errorf("no new client found") } // AddAndLoginClient adds a new tailscale client to a user and logs it in. // This combines the common pattern of: // 1. Creating a new node // 2. Finding the new node in the client list // 3. Getting the user to create a preauth key // 4. Logging in the new node func (s *Scenario) AddAndLoginClient( t *testing.T, username string, version string, headscale ControlServer, tsOpts ...tsic.Option, ) (TailscaleClient, error) { t.Helper() // Get the original client list originalClients, err := s.ListTailscaleClients(username) if err != nil { return nil, fmt.Errorf("failed to list original clients: %w", err) } // Create the new node err = s.CreateTailscaleNodesInUser(username, version, 1, tsOpts...) if err != nil { return nil, fmt.Errorf("failed to create tailscale node: %w", err) } // Wait for the new node to appear in the client list var newClient TailscaleClient _, err = backoff.Retry(t.Context(), func() (struct{}, error) { updatedClients, err := s.ListTailscaleClients(username) if err != nil { return struct{}{}, fmt.Errorf("failed to list updated clients: %w", err) } if len(updatedClients) != len(originalClients)+1 { return struct{}{}, fmt.Errorf("expected %d clients, got %d", len(originalClients)+1, len(updatedClients)) } newClient, err = FindNewClient(originalClients, updatedClients) if err != nil { return struct{}{}, fmt.Errorf("failed to find new client: %w", err) } return struct{}{}, nil }, backoff.WithBackOff(backoff.NewConstantBackOff(500*time.Millisecond)), backoff.WithMaxElapsedTime(10*time.Second)) if err != nil { return nil, fmt.Errorf("timeout waiting for new client: %w", err) } // Get the user and create preauth key user, err := GetUserByName(headscale, username) if err != nil { return nil, fmt.Errorf("failed to get user: %w", err) } authKey, err := s.CreatePreAuthKey(user.GetId(), true, false) if err != nil { return nil, fmt.Errorf("failed to create preauth key: %w", err) } // Login the new client err = newClient.Login(headscale.GetEndpoint(), authKey.GetKey()) if err != nil { return nil, fmt.Errorf("failed to login new client: %w", err) } return newClient, nil } // MustAddAndLoginClient is like AddAndLoginClient but fails the test on error. func (s *Scenario) MustAddAndLoginClient( t *testing.T, username string, version string, headscale ControlServer, tsOpts ...tsic.Option, ) TailscaleClient { t.Helper() client, err := s.AddAndLoginClient(t, username, version, headscale, tsOpts...) require.NoError(t, err) return client }