Skip to content

Commit c864642

Browse files
author
David Grieser
committed
Breaking up code.
1 parent 27e1860 commit c864642

File tree

1 file changed

+71
-64
lines changed

1 file changed

+71
-64
lines changed

pkg/proc/job_lazy.go

Lines changed: 71 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,74 @@ func (job *LazyJob) Run(ctx context.Context, errors chan<- error) error {
8585
return nil
8686
}
8787

88+
func (job *LazyJob) startProcessGracePeriod(ctx context.Context, pid int) {
89+
l := log.WithField("job.name", job.Config.Name)
90+
graceTimer := time.NewTimer(lazyJobReapGracePeriod)
91+
defer graceTimer.Stop()
92+
93+
select {
94+
case <-ctx.Done():
95+
l.Infof("context done during SIGTERM grace period for PID %d", pid)
96+
return
97+
case <-graceTimer.C:
98+
job.lazyStartLock.Lock()
99+
defer job.lazyStartLock.Unlock()
100+
101+
// Check if the process we sent SIGTERM to is still running
102+
if job.process != nil && job.HasStarted() && job.cmd.Process.Pid == pid {
103+
l.Warnf("process PID %d did not exit after SIGTERM and grace period; sending SIGKILL", pid)
104+
if err := job.signalAll(syscall.SIGKILL); err != nil {
105+
l.WithError(err).Errorf("failed to send1 SIGKILL to PID %d", pid)
106+
}
107+
} else if job.process != nil {
108+
// Process is not nil, but it's not the one we targeted.
109+
// This could happen if the job was quickly restarted.
110+
currentPid := -1
111+
if job.HasStarted() {
112+
currentPid = job.cmd.Process.Pid
113+
}
114+
l.Warnf("original process PID %d seems to have exited or changed; current PID is %d. Skipping SIGKILL.", pidToReap, currentPid)
115+
} else {
116+
// job.process is nil, so it was cleaned up.
117+
l.Infof("process PID %d exited gracefully after SIGTERM", pid)
118+
}
119+
}
120+
}
121+
122+
func (job *LazyJob) reapProcess() int {
123+
l := log.WithField("job.name", job.Config.Name)
124+
if job.activeConnections > 0 {
125+
return 0
126+
}
127+
if diff := time.Since(job.lastConnectionClosed); diff < job.coolDownTimeout {
128+
return 0
129+
}
130+
if job.process == nil {
131+
return 0
132+
}
133+
134+
job.lazyStartLock.Lock()
135+
defer job.lazyStartLock.Unlock()
136+
137+
// Verify all conditions again inside the lock
138+
if job.process == nil || job.activeConnections != 0 || job.lastConnectionClosed.IsZero() || time.Since(job.lastConnectionClosed) < job.coolDownTimeout {
139+
return 0
140+
}
141+
if !job.HasStarted() {
142+
l.Warn("job.process is not nil, but job.cmd or job.cmd.Process is nil; skipping reap cycle")
143+
return 0
144+
}
145+
146+
pidToReap := job.cmd.Process.Pid
147+
l.Infof("sending SIGTERM to idle process PID %d", pidToReap)
148+
if err := job.signal(syscall.SIGTERM); err != nil {
149+
l.WithError(err).Warnf("failed to send SIGTERM to PID %d", pidToReap)
150+
return 0
151+
}
152+
153+
return pidToReap
154+
}
155+
88156
func (job *LazyJob) startProcessReaper(ctx context.Context) {
89157
reaperInterval := job.coolDownTimeout / 2
90158
if reaperInterval < 1*time.Second {
@@ -103,72 +171,11 @@ func (job *LazyJob) startProcessReaper(ctx context.Context) {
103171
l.Info("context done, stopping lazy job process reaper")
104172
return
105173
case <-ticker.C:
106-
if job.activeConnections > 0 {
107-
continue
108-
}
109-
110-
diff := time.Since(job.lastConnectionClosed)
111-
if diff < job.coolDownTimeout {
112-
continue
113-
}
114-
115-
if job.process == nil {
174+
pidToReap := job.reapProcess()
175+
if pidToReap == 0 {
116176
continue
117177
}
118-
119-
// All conditions met to reap the process
120-
job.lazyStartLock.Lock()
121-
122-
// Verify all conditions again inside the lock
123-
if job.process == nil || job.activeConnections != 0 || job.lastConnectionClosed.IsZero() || time.Since(job.lastConnectionClosed) < job.coolDownTimeout {
124-
job.lazyStartLock.Unlock()
125-
continue
126-
}
127-
128-
if !job.HasStarted() {
129-
l.Warn("job.process is not nil, but job.cmd or job.cmd.Process is nil; skipping reap cycle")
130-
job.lazyStartLock.Unlock()
131-
continue
132-
}
133-
pidToReap := job.cmd.Process.Pid
134-
l.Infof("sending SIGTERM to idle process PID %d", pidToReap)
135-
if err := job.signal(syscall.SIGTERM); err != nil {
136-
l.WithError(err).Warnf("failed to send SIGTERM to PID %d", pidToReap)
137-
job.lazyStartLock.Unlock()
138-
continue
139-
}
140-
141-
job.lazyStartLock.Unlock()
142-
143-
graceTimer := time.NewTimer(lazyJobReapGracePeriod)
144-
defer graceTimer.Stop()
145-
146-
select {
147-
case <-graceTimer.C:
148-
job.lazyStartLock.Lock()
149-
// Check if the process we sent SIGTERM to is still running
150-
if job.process != nil && job.HasStarted() && job.cmd.Process.Pid == pidToReap {
151-
l.Warnf("process PID %d did not exit after SIGTERM and grace period; sending SIGKILL", pidToReap)
152-
if err := job.signalAll(syscall.SIGKILL); err != nil {
153-
l.WithError(err).Errorf("failed to send1 SIGKILL to PID %d", pidToReap)
154-
}
155-
} else if job.process != nil {
156-
// Process is not nil, but it's not the one we targeted.
157-
// This could happen if the job was quickly restarted.
158-
currentPid := -1
159-
if job.HasStarted() {
160-
currentPid = job.cmd.Process.Pid
161-
}
162-
l.Warnf("original process PID %d seems to have exited or changed; current PID is %d. Skipping SIGKILL.", pidToReap, currentPid)
163-
} else {
164-
// job.process is nil, so it was cleaned up.
165-
l.Infof("process PID %d exited gracefully after SIGTERM", pidToReap)
166-
}
167-
job.lazyStartLock.Unlock()
168-
case <-ctx.Done():
169-
l.Infof("context done during SIGTERM grace period for PID %d", pidToReap)
170-
return // Exit the reaper goroutine
171-
}
178+
job.startProcessGracePeriod(ctx, pidToReap)
172179
}
173180
}
174181
}()

0 commit comments

Comments
 (0)