@@ -85,6 +85,74 @@ func (job *LazyJob) Run(ctx context.Context, errors chan<- error) error {
85
85
return nil
86
86
}
87
87
88
+ func (job * LazyJob ) startProcessGracePeriod (ctx context.Context , pid int ) {
89
+ l := log .WithField ("job.name" , job .Config .Name )
90
+ graceTimer := time .NewTimer (lazyJobReapGracePeriod )
91
+ defer graceTimer .Stop ()
92
+
93
+ select {
94
+ case <- ctx .Done ():
95
+ l .Infof ("context done during SIGTERM grace period for PID %d" , pid )
96
+ return
97
+ case <- graceTimer .C :
98
+ job .lazyStartLock .Lock ()
99
+ defer job .lazyStartLock .Unlock ()
100
+
101
+ // Check if the process we sent SIGTERM to is still running
102
+ if job .process != nil && job .HasStarted () && job .cmd .Process .Pid == pid {
103
+ l .Warnf ("process PID %d did not exit after SIGTERM and grace period; sending SIGKILL" , pid )
104
+ if err := job .signalAll (syscall .SIGKILL ); err != nil {
105
+ l .WithError (err ).Errorf ("failed to send1 SIGKILL to PID %d" , pid )
106
+ }
107
+ } else if job .process != nil {
108
+ // Process is not nil, but it's not the one we targeted.
109
+ // This could happen if the job was quickly restarted.
110
+ currentPid := - 1
111
+ if job .HasStarted () {
112
+ currentPid = job .cmd .Process .Pid
113
+ }
114
+ l .Warnf ("original process PID %d seems to have exited or changed; current PID is %d. Skipping SIGKILL." , pidToReap , currentPid )
115
+ } else {
116
+ // job.process is nil, so it was cleaned up.
117
+ l .Infof ("process PID %d exited gracefully after SIGTERM" , pid )
118
+ }
119
+ }
120
+ }
121
+
122
+ func (job * LazyJob ) reapProcess () int {
123
+ l := log .WithField ("job.name" , job .Config .Name )
124
+ if job .activeConnections > 0 {
125
+ return 0
126
+ }
127
+ if diff := time .Since (job .lastConnectionClosed ); diff < job .coolDownTimeout {
128
+ return 0
129
+ }
130
+ if job .process == nil {
131
+ return 0
132
+ }
133
+
134
+ job .lazyStartLock .Lock ()
135
+ defer job .lazyStartLock .Unlock ()
136
+
137
+ // Verify all conditions again inside the lock
138
+ if job .process == nil || job .activeConnections != 0 || job .lastConnectionClosed .IsZero () || time .Since (job .lastConnectionClosed ) < job .coolDownTimeout {
139
+ return 0
140
+ }
141
+ if ! job .HasStarted () {
142
+ l .Warn ("job.process is not nil, but job.cmd or job.cmd.Process is nil; skipping reap cycle" )
143
+ return 0
144
+ }
145
+
146
+ pidToReap := job .cmd .Process .Pid
147
+ l .Infof ("sending SIGTERM to idle process PID %d" , pidToReap )
148
+ if err := job .signal (syscall .SIGTERM ); err != nil {
149
+ l .WithError (err ).Warnf ("failed to send SIGTERM to PID %d" , pidToReap )
150
+ return 0
151
+ }
152
+
153
+ return pidToReap
154
+ }
155
+
88
156
func (job * LazyJob ) startProcessReaper (ctx context.Context ) {
89
157
reaperInterval := job .coolDownTimeout / 2
90
158
if reaperInterval < 1 * time .Second {
@@ -103,72 +171,11 @@ func (job *LazyJob) startProcessReaper(ctx context.Context) {
103
171
l .Info ("context done, stopping lazy job process reaper" )
104
172
return
105
173
case <- ticker .C :
106
- if job .activeConnections > 0 {
107
- continue
108
- }
109
-
110
- diff := time .Since (job .lastConnectionClosed )
111
- if diff < job .coolDownTimeout {
112
- continue
113
- }
114
-
115
- if job .process == nil {
174
+ pidToReap := job .reapProcess ()
175
+ if pidToReap == 0 {
116
176
continue
117
177
}
118
-
119
- // All conditions met to reap the process
120
- job .lazyStartLock .Lock ()
121
-
122
- // Verify all conditions again inside the lock
123
- if job .process == nil || job .activeConnections != 0 || job .lastConnectionClosed .IsZero () || time .Since (job .lastConnectionClosed ) < job .coolDownTimeout {
124
- job .lazyStartLock .Unlock ()
125
- continue
126
- }
127
-
128
- if ! job .HasStarted () {
129
- l .Warn ("job.process is not nil, but job.cmd or job.cmd.Process is nil; skipping reap cycle" )
130
- job .lazyStartLock .Unlock ()
131
- continue
132
- }
133
- pidToReap := job .cmd .Process .Pid
134
- l .Infof ("sending SIGTERM to idle process PID %d" , pidToReap )
135
- if err := job .signal (syscall .SIGTERM ); err != nil {
136
- l .WithError (err ).Warnf ("failed to send SIGTERM to PID %d" , pidToReap )
137
- job .lazyStartLock .Unlock ()
138
- continue
139
- }
140
-
141
- job .lazyStartLock .Unlock ()
142
-
143
- graceTimer := time .NewTimer (lazyJobReapGracePeriod )
144
- defer graceTimer .Stop ()
145
-
146
- select {
147
- case <- graceTimer .C :
148
- job .lazyStartLock .Lock ()
149
- // Check if the process we sent SIGTERM to is still running
150
- if job .process != nil && job .HasStarted () && job .cmd .Process .Pid == pidToReap {
151
- l .Warnf ("process PID %d did not exit after SIGTERM and grace period; sending SIGKILL" , pidToReap )
152
- if err := job .signalAll (syscall .SIGKILL ); err != nil {
153
- l .WithError (err ).Errorf ("failed to send1 SIGKILL to PID %d" , pidToReap )
154
- }
155
- } else if job .process != nil {
156
- // Process is not nil, but it's not the one we targeted.
157
- // This could happen if the job was quickly restarted.
158
- currentPid := - 1
159
- if job .HasStarted () {
160
- currentPid = job .cmd .Process .Pid
161
- }
162
- l .Warnf ("original process PID %d seems to have exited or changed; current PID is %d. Skipping SIGKILL." , pidToReap , currentPid )
163
- } else {
164
- // job.process is nil, so it was cleaned up.
165
- l .Infof ("process PID %d exited gracefully after SIGTERM" , pidToReap )
166
- }
167
- job .lazyStartLock .Unlock ()
168
- case <- ctx .Done ():
169
- l .Infof ("context done during SIGTERM grace period for PID %d" , pidToReap )
170
- return // Exit the reaper goroutine
171
- }
178
+ job .startProcessGracePeriod (ctx , pidToReap )
172
179
}
173
180
}
174
181
}()
0 commit comments