mapper/batcher: minor production code cleanup

L1: Replace crypto/rand with an atomic counter for generating
connection IDs. These identifiers are process-local and do not need
cryptographic randomness; a monotonic counter is cheaper and
produces shorter, sortable IDs.

L5: Use getActiveConnectionCount() in Debug() instead of directly
locking the mutex and reading the connections slice. This avoids
bypassing the accessor that already exists for this purpose.

L6: Extract the hardcoded 15*time.Minute cleanup threshold into
the named constant offlineNodeCleanupThreshold.

L7: Inline the trivial addWork wrapper; AddWork now calls addToBatch
directly.

Updates #2545
This commit is contained in:
Kristoffer Dalby
2026-03-13 16:03:28 +00:00
parent 7881f65358
commit 86e279869e
2 changed files with 15 additions and 17 deletions

View File

@@ -26,6 +26,10 @@ var (
ErrNodeNotFoundMapper = errors.New("node not found")
)
// offlineNodeCleanupThreshold is how long a node must be disconnected
// before cleanupOfflineNodes removes its in-memory state.
const offlineNodeCleanupThreshold = 15 * time.Minute
var mapResponseGenerated = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "headscale",
Name: "mapresponse_generated_total",
@@ -327,7 +331,7 @@ func (b *Batcher) RemoveNode(id types.NodeID, c chan<- *tailcfg.MapResponse) boo
// AddWork queues a change to be processed by the batcher.
func (b *Batcher) AddWork(r ...change.Change) {
b.addWork(r...)
b.addToBatch(r...)
}
func (b *Batcher) Start() {
@@ -477,10 +481,6 @@ func (b *Batcher) worker(workerID int) {
}
}
func (b *Batcher) addWork(r ...change.Change) {
b.addToBatch(r...)
}
// queueWork safely queues work.
func (b *Batcher) queueWork(w work) {
b.workQueuedCount.Add(1)
@@ -595,14 +595,13 @@ func (b *Batcher) processBatchedChanges() {
// reconnects between the hasActiveConnections() check and the Delete() call.
// TODO(kradalby): reevaluate if we want to keep this.
func (b *Batcher) cleanupOfflineNodes() {
cleanupThreshold := 15 * time.Minute
now := time.Now()
var nodesToCleanup []types.NodeID
// Find nodes that have been offline for too long
b.connected.Range(func(nodeID types.NodeID, disconnectTime *time.Time) bool {
if disconnectTime != nil && now.Sub(*disconnectTime) > cleanupThreshold {
if disconnectTime != nil && now.Sub(*disconnectTime) > offlineNodeCleanupThreshold {
nodesToCleanup = append(nodesToCleanup, nodeID)
}
@@ -632,7 +631,7 @@ func (b *Batcher) cleanupOfflineNodes() {
cleaned++
log.Info().Uint64(zf.NodeID, nodeID.Uint64()).
Dur("offline_duration", cleanupThreshold).
Dur("offline_duration", offlineNodeCleanupThreshold).
Msg("cleaning up node that has been offline for too long")
return conn, xsync.DeleteOp
@@ -750,9 +749,7 @@ func (b *Batcher) Debug() map[types.NodeID]DebugNodeInfo {
return true
}
nodeConn.mutex.RLock()
activeConnCount := len(nodeConn.connections)
nodeConn.mutex.RUnlock()
activeConnCount := nodeConn.getActiveConnectionCount()
// Use immediate connection status: if active connections exist, node is connected
// If not, check the connected map for nil (connected) vs timestamp (disconnected)

View File

@@ -1,9 +1,8 @@
package mapper
import (
"crypto/rand"
"encoding/hex"
"fmt"
"strconv"
"sync"
"sync/atomic"
"time"
@@ -53,12 +52,14 @@ type multiChannelNodeConn struct {
lastSentPeers *xsync.Map[tailcfg.NodeID, struct{}]
}
// connIDCounter is a monotonically increasing counter used to generate
// unique connection identifiers without the overhead of crypto/rand.
// Connection IDs are process-local and need not be cryptographically random.
var connIDCounter atomic.Uint64
// generateConnectionID generates a unique connection identifier.
func generateConnectionID() string {
bytes := make([]byte, 8)
_, _ = rand.Read(bytes)
return hex.EncodeToString(bytes)
return strconv.FormatUint(connIDCounter.Add(1), 10)
}
// newMultiChannelNodeConn creates a new multi-channel node connection.