Skip to content

Commit eb47d1c

Browse files
authored
scheduler: eliminate dead code in node reconciler (#26236)
While working on property testing in #26216, I discovered we had unreachable code in the node reconciler. The `diffSystemAllocsForNode` function receives a set of non-terminal allocations, but then has branches where it assumes the allocations might be terminal. It's trivially provable that these allocs are always live, as the system scheduler splits the set of known allocs into live and terminal sets before passing them into the node reconciler. Eliminate the unreachable code and improve the variable names to make the known state of the allocs more clear in the reconciler code. Ref: #26216
1 parent 8bc6abc commit eb47d1c

File tree

1 file changed

+9
-18
lines changed

1 file changed

+9
-18
lines changed

scheduler/reconciler/reconcile_node.go

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ func Node(
1818
readyNodes []*structs.Node, // list of nodes in the ready state
1919
notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining
2020
taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by node id)
21-
allocs []*structs.Allocation, // non-terminal allocations
21+
live []*structs.Allocation, // non-terminal allocations
2222
terminal structs.TerminalByNodeByName, // latest terminal allocations (by node id)
2323
serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic
2424
) *NodeReconcileResult {
2525

2626
// Build a mapping of nodes to all their allocs.
27-
nodeAllocs := make(map[string][]*structs.Allocation, len(allocs))
28-
for _, alloc := range allocs {
27+
nodeAllocs := make(map[string][]*structs.Allocation, len(live))
28+
for _, alloc := range live {
2929
nodeAllocs[alloc.NodeID] = append(nodeAllocs[alloc.NodeID], alloc)
3030
}
3131

@@ -65,15 +65,15 @@ func diffSystemAllocsForNode(
6565
notReadyNodes map[string]struct{}, // nodes that are not ready, e.g. draining
6666
taintedNodes map[string]*structs.Node, // nodes which are down (by node id)
6767
required map[string]*structs.TaskGroup, // set of allocations that must exist
68-
allocs []*structs.Allocation, // non-terminal allocations that exist
68+
liveAllocs []*structs.Allocation, // non-terminal allocations that exist
6969
terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, id)
7070
serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic
7171
) *NodeReconcileResult {
7272
result := new(NodeReconcileResult)
7373

7474
// Scan the existing updates
7575
existing := make(map[string]struct{}) // set of alloc names
76-
for _, exist := range allocs {
76+
for _, exist := range liveAllocs {
7777
// Index the existing node
7878
name := exist.Name
7979
existing[name] = struct{}{}
@@ -108,7 +108,7 @@ func diffSystemAllocsForNode(
108108
}
109109

110110
// If we have been marked for migration and aren't terminal, migrate
111-
if !exist.TerminalStatus() && exist.DesiredTransition.ShouldMigrate() {
111+
if exist.DesiredTransition.ShouldMigrate() {
112112
result.Migrate = append(result.Migrate, AllocTuple{
113113
Name: name,
114114
TaskGroup: tg,
@@ -117,16 +117,6 @@ func diffSystemAllocsForNode(
117117
continue
118118
}
119119

120-
// If we are a sysbatch job and terminal, ignore (or stop?) the alloc
121-
if job.Type == structs.JobTypeSysBatch && exist.TerminalStatus() {
122-
result.Ignore = append(result.Ignore, AllocTuple{
123-
Name: name,
124-
TaskGroup: tg,
125-
Alloc: exist,
126-
})
127-
continue
128-
}
129-
130120
// Expired unknown allocs are lost. Expired checks that status is unknown.
131121
if supportsDisconnectedClients && expired {
132122
result.Lost = append(result.Lost, AllocTuple{
@@ -149,6 +139,7 @@ func diffSystemAllocsForNode(
149139
continue
150140
}
151141

142+
// note: the node can be both tainted and nil
152143
node, nodeIsTainted := taintedNodes[exist.NodeID]
153144

154145
// Filter allocs on a node that is now re-connected to reconnecting.
@@ -198,7 +189,7 @@ func diffSystemAllocsForNode(
198189
continue
199190
}
200191

201-
if !exist.TerminalStatus() && (node == nil || node.TerminalStatus()) {
192+
if node == nil || node.TerminalStatus() {
202193
result.Lost = append(result.Lost, AllocTuple{
203194
Name: name,
204195
TaskGroup: tg,
@@ -319,7 +310,7 @@ func materializeSystemTaskGroups(job *structs.Job) map[string]*structs.TaskGroup
319310
}
320311

321312
for _, tg := range job.TaskGroups {
322-
for i := 0; i < tg.Count; i++ {
313+
for i := range tg.Count {
323314
name := fmt.Sprintf("%s.%s[%d]", job.Name, tg.Name, i)
324315
out[name] = tg
325316
}

0 commit comments

Comments
 (0)