mapper/batcher: minor production code cleanup

L1: Replace crypto/rand with an atomic counter for generating connection IDs. These identifiers are process-local and do not need cryptographic randomness; a monotonic counter is cheaper and produces shorter, sortable IDs. L5: Use getActiveConnectionCount() in Debug() instead of directly locking the mutex and reading the connections slice. This avoids bypassing the accessor that already exists for this purpose. L6: Extract the hardcoded 15*time.Minute cleanup threshold into the named constant offlineNodeCleanupThreshold. L7: Inline the trivial addWork wrapper; AddWork now calls addToBatch directly. Updates #2545
2026-07-08 17:10:21 +09:00 · 2026-03-13 16:03:28 +00:00
parent 7881f65358
commit 86e279869e
2 changed files with 15 additions and 17 deletions
@@ -26,6 +26,10 @@ var (
 	ErrNodeNotFoundMapper = errors.New("node not found")
 )

+// offlineNodeCleanupThreshold is how long a node must be disconnected
+// before cleanupOfflineNodes removes its in-memory state.
+const offlineNodeCleanupThreshold = 15 * time.Minute
+
 var mapResponseGenerated = promauto.NewCounterVec(prometheus.CounterOpts{
 	Namespace: "headscale",
 	Name:      "mapresponse_generated_total",
@@ -327,7 +331,7 @@ func (b *Batcher) RemoveNode(id types.NodeID, c chan<- *tailcfg.MapResponse) boo

 // AddWork queues a change to be processed by the batcher.
 func (b *Batcher) AddWork(r ...change.Change) {
-	b.addWork(r...)
+	b.addToBatch(r...)
 }

 func (b *Batcher) Start() {
@@ -477,10 +481,6 @@ func (b *Batcher) worker(workerID int) {
 	}
 }

-func (b *Batcher) addWork(r ...change.Change) {
-	b.addToBatch(r...)
-}
-
 // queueWork safely queues work.
 func (b *Batcher) queueWork(w work) {
 	b.workQueuedCount.Add(1)
@@ -595,14 +595,13 @@ func (b *Batcher) processBatchedChanges() {
 // reconnects between the hasActiveConnections() check and the Delete() call.
 // TODO(kradalby): reevaluate if we want to keep this.
 func (b *Batcher) cleanupOfflineNodes() {
-	cleanupThreshold := 15 * time.Minute
 	now := time.Now()

 	var nodesToCleanup []types.NodeID

 	// Find nodes that have been offline for too long
 	b.connected.Range(func(nodeID types.NodeID, disconnectTime *time.Time) bool {
-		if disconnectTime != nil && now.Sub(*disconnectTime) > cleanupThreshold {
+		if disconnectTime != nil && now.Sub(*disconnectTime) > offlineNodeCleanupThreshold {
 			nodesToCleanup = append(nodesToCleanup, nodeID)
 		}

@@ -632,7 +631,7 @@ func (b *Batcher) cleanupOfflineNodes() {
 				cleaned++

 				log.Info().Uint64(zf.NodeID, nodeID.Uint64()).
-					Dur("offline_duration", cleanupThreshold).
+					Dur("offline_duration", offlineNodeCleanupThreshold).
 					Msg("cleaning up node that has been offline for too long")

 				return conn, xsync.DeleteOp
@@ -750,9 +749,7 @@ func (b *Batcher) Debug() map[types.NodeID]DebugNodeInfo {
 			return true
 		}

-		nodeConn.mutex.RLock()
-		activeConnCount := len(nodeConn.connections)
-		nodeConn.mutex.RUnlock()
+		activeConnCount := nodeConn.getActiveConnectionCount()

 		// Use immediate connection status: if active connections exist, node is connected
 		// If not, check the connected map for nil (connected) vs timestamp (disconnected)
@@ -1,9 +1,8 @@
 package mapper

 import (
-	"crypto/rand"
-	"encoding/hex"
 	"fmt"
+	"strconv"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -53,12 +52,14 @@ type multiChannelNodeConn struct {
 	lastSentPeers *xsync.Map[tailcfg.NodeID, struct{}]
 }

+// connIDCounter is a monotonically increasing counter used to generate
+// unique connection identifiers without the overhead of crypto/rand.
+// Connection IDs are process-local and need not be cryptographically random.
+var connIDCounter atomic.Uint64
+
 // generateConnectionID generates a unique connection identifier.
 func generateConnectionID() string {
-	bytes := make([]byte, 8)
-	_, _ = rand.Read(bytes)
-
-	return hex.EncodeToString(bytes)
+	return strconv.FormatUint(connIDCounter.Add(1), 10)
 }

 // newMultiChannelNodeConn creates a new multi-channel node connection.