mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-12 02:30:18 +00:00
a8f5449cd9
There is a fundamental design problem of the "manager" and the "wait group". If nothing has started, the "Wait" just panics: sync: WaitGroup is reused before previous Wait has returned There is no clear solution besides a complete rewriting of the "manager" If there are some mistakes in the app.ini, end users would just see the "panic", but not the real error messages. A real case: #27643 This PR is just a quick fix for the annoying panic problem. (cherry picked from commit 90a3f2d4b7ed3890d9655c0334444f86d89b7b30)
204 lines
5.5 KiB
Go
204 lines
5.5 KiB
Go
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
//go:build !windows
|
|
|
|
package graceful
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"os/signal"
|
|
"runtime/pprof"
|
|
"strconv"
|
|
"syscall"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/modules/graceful/releasereopen"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/process"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
)
|
|
|
|
func pidMsg() systemdNotifyMsg {
|
|
return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid()))
|
|
}
|
|
|
|
// Notify systemd of status via the notify protocol
|
|
func (g *Manager) notify(msg systemdNotifyMsg) {
|
|
conn, err := getNotifySocket()
|
|
if err != nil {
|
|
// the err is logged in getNotifySocket
|
|
return
|
|
}
|
|
if conn == nil {
|
|
return
|
|
}
|
|
defer conn.Close()
|
|
|
|
if _, err = conn.Write([]byte(msg)); err != nil {
|
|
log.Warn("Failed to notify NOTIFY_SOCKET: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
func (g *Manager) start() {
|
|
// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
|
|
pprof.SetGoroutineLabels(g.managerCtx)
|
|
defer pprof.SetGoroutineLabels(g.ctx)
|
|
|
|
g.isChild = len(os.Getenv(listenFDsEnv)) > 0 && os.Getppid() > 1
|
|
|
|
g.notify(statusMsg("Starting Gitea"))
|
|
g.notify(pidMsg())
|
|
go g.handleSignals(g.managerCtx)
|
|
|
|
// Handle clean up of unused provided listeners and delayed start-up
|
|
startupDone := make(chan struct{})
|
|
go func() {
|
|
defer close(startupDone)
|
|
// Wait till we're done getting all the listeners and then close the unused ones
|
|
func() {
|
|
// FIXME: there is a fundamental design problem of the "manager" and the "wait group".
|
|
// If nothing has started, the "Wait" just panics: sync: WaitGroup is reused before previous Wait has returned
|
|
// There is no clear solution besides a complete rewriting of the "manager"
|
|
defer func() {
|
|
_ = recover()
|
|
}()
|
|
g.createServerWaitGroup.Wait()
|
|
}()
|
|
// Ignore the error here there's not much we can do with it, they're logged in the CloseProvidedListeners function
|
|
_ = CloseProvidedListeners()
|
|
g.notify(readyMsg)
|
|
}()
|
|
if setting.StartupTimeout > 0 {
|
|
go func() {
|
|
select {
|
|
case <-startupDone:
|
|
return
|
|
case <-g.IsShutdown():
|
|
func() {
|
|
// When WaitGroup counter goes negative it will panic - we don't care about this so we can just ignore it.
|
|
defer func() {
|
|
_ = recover()
|
|
}()
|
|
// Ensure that the createServerWaitGroup stops waiting
|
|
for {
|
|
g.createServerWaitGroup.Done()
|
|
}
|
|
}()
|
|
return
|
|
case <-time.After(setting.StartupTimeout):
|
|
log.Error("Startup took too long! Shutting down")
|
|
g.notify(statusMsg("Startup took too long! Shutting down"))
|
|
g.notify(stoppingMsg)
|
|
g.doShutdown()
|
|
}
|
|
}()
|
|
}
|
|
}
|
|
|
|
func (g *Manager) handleSignals(ctx context.Context) {
|
|
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Graceful: HandleSignals", process.SystemProcessType, true)
|
|
defer finished()
|
|
|
|
signalChannel := make(chan os.Signal, 1)
|
|
|
|
signal.Notify(
|
|
signalChannel,
|
|
syscall.SIGHUP,
|
|
syscall.SIGUSR1,
|
|
syscall.SIGUSR2,
|
|
syscall.SIGINT,
|
|
syscall.SIGTERM,
|
|
syscall.SIGTSTP,
|
|
)
|
|
|
|
watchdogTimeout := getWatchdogTimeout()
|
|
t := &time.Ticker{}
|
|
if watchdogTimeout != 0 {
|
|
g.notify(watchdogMsg)
|
|
t = time.NewTicker(watchdogTimeout / 2)
|
|
}
|
|
|
|
pid := syscall.Getpid()
|
|
for {
|
|
select {
|
|
case sig := <-signalChannel:
|
|
switch sig {
|
|
case syscall.SIGHUP:
|
|
log.Info("PID: %d. Received SIGHUP. Attempting GracefulRestart...", pid)
|
|
g.DoGracefulRestart()
|
|
case syscall.SIGUSR1:
|
|
log.Warn("PID %d. Received SIGUSR1. Releasing and reopening logs", pid)
|
|
g.notify(statusMsg("Releasing and reopening logs"))
|
|
if err := releasereopen.GetManager().ReleaseReopen(); err != nil {
|
|
log.Error("Error whilst releasing and reopening logs: %v", err)
|
|
}
|
|
case syscall.SIGUSR2:
|
|
log.Warn("PID %d. Received SIGUSR2. Hammering...", pid)
|
|
g.DoImmediateHammer()
|
|
case syscall.SIGINT:
|
|
log.Warn("PID %d. Received SIGINT. Shutting down...", pid)
|
|
g.DoGracefulShutdown()
|
|
case syscall.SIGTERM:
|
|
log.Warn("PID %d. Received SIGTERM. Shutting down...", pid)
|
|
g.DoGracefulShutdown()
|
|
case syscall.SIGTSTP:
|
|
log.Info("PID %d. Received SIGTSTP.", pid)
|
|
default:
|
|
log.Info("PID %d. Received %v.", pid, sig)
|
|
}
|
|
case <-t.C:
|
|
g.notify(watchdogMsg)
|
|
case <-ctx.Done():
|
|
log.Warn("PID: %d. Background context for manager closed - %v - Shutting down...", pid, ctx.Err())
|
|
g.DoGracefulShutdown()
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (g *Manager) doFork() error {
|
|
g.lock.Lock()
|
|
if g.forked {
|
|
g.lock.Unlock()
|
|
return errors.New("another process already forked. Ignoring this one")
|
|
}
|
|
g.forked = true
|
|
g.lock.Unlock()
|
|
|
|
g.notify(reloadingMsg)
|
|
|
|
// We need to move the file logs to append pids
|
|
setting.RestartLogsWithPIDSuffix()
|
|
|
|
_, err := RestartProcess()
|
|
|
|
return err
|
|
}
|
|
|
|
// DoGracefulRestart causes a graceful restart
|
|
func (g *Manager) DoGracefulRestart() {
|
|
if setting.GracefulRestartable {
|
|
log.Info("PID: %d. Forking...", os.Getpid())
|
|
err := g.doFork()
|
|
if err != nil {
|
|
if err.Error() == "another process already forked. Ignoring this one" {
|
|
g.DoImmediateHammer()
|
|
} else {
|
|
log.Error("Error whilst forking from PID: %d : %v", os.Getpid(), err)
|
|
}
|
|
}
|
|
// doFork calls RestartProcess which starts a new Gitea process, so this parent process needs to exit
|
|
// Otherwise some resources (eg: leveldb lock) will be held by this parent process and the new process will fail to start
|
|
log.Info("PID: %d. Shutting down after forking ...", os.Getpid())
|
|
g.doShutdown()
|
|
} else {
|
|
log.Info("PID: %d. Not set restartable. Shutting down...", os.Getpid())
|
|
g.notify(stoppingMsg)
|
|
g.doShutdown()
|
|
}
|
|
}
|