From 0c40cff9a44bc79617f2868ee41c77c2cb973674 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 2 Aug 2024 08:42:08 +0800 Subject: [PATCH] Clear up old Actions logs (#31735) Part of #24256. Clear up old action logs to free up storage space. Users will see a message indicating that the log has been cleared if they view old tasks. image Docs: https://gitea.com/gitea/docs/pulls/40 --------- Co-authored-by: silverwind (cherry picked from commit 687c1182482ad9443a5911c068b317a91c91d586) Conflicts: custom/conf/app.example.ini routers/web/repo/actions/view.go trivial context conflict --- custom/conf/app.example.ini | 4 +- models/actions/task.go | 16 ++++++-- models/migrations/migrations.go | 2 + models/migrations/v1_23/v302.go | 18 +++++++++ modules/setting/actions.go | 16 ++++++-- options/locale/locale_en-US.ini | 1 + routers/web/repo/actions/view.go | 21 ++++++++++ services/actions/cleanup.go | 67 +++++++++++++++++++++++++++----- services/cron/tasks_actions.go | 2 +- 9 files changed, 129 insertions(+), 18 deletions(-) create mode 100644 models/migrations/v1_23/v302.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 8307dd31a1..a22276a0d6 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2710,7 +2710,9 @@ LEVEL = Info ;ENABLED = true ;; Default address to get action plugins, e.g. the default value means downloading from "https://code.forgejo.org/actions/checkout" for "uses: actions/checkout@v3" ;DEFAULT_ACTIONS_URL = https://code.forgejo.org -;; Default artifact retention time in days, default is 90 days +;; Logs retention time in days. Old logs will be deleted after this period. +;LOG_RETENTION_DAYS = 365 +;; Default artifact retention time in days. Artifacts could have their own retention periods by setting the `retention-days` option in `actions/upload-artifact` step. ;ARTIFACT_RETENTION_DAYS = 90 ;; Timeout to stop the task which have running status, but haven't been updated for a long time ;ZOMBIE_TASK_TIMEOUT = 10m diff --git a/models/actions/task.go b/models/actions/task.go index 9946cf5233..1d6d68309b 100644 --- a/models/actions/task.go +++ b/models/actions/task.go @@ -35,7 +35,7 @@ type ActionTask struct { RunnerID int64 `xorm:"index"` Status Status `xorm:"index"` Started timeutil.TimeStamp `xorm:"index"` - Stopped timeutil.TimeStamp + Stopped timeutil.TimeStamp `xorm:"index(stopped_log_expired)"` RepoID int64 `xorm:"index"` OwnerID int64 `xorm:"index"` @@ -51,8 +51,8 @@ type ActionTask struct { LogInStorage bool // read log from database or from storage LogLength int64 // lines count LogSize int64 // blob size - LogIndexes LogIndexes `xorm:"LONGBLOB"` // line number to offset - LogExpired bool // files that are too old will be deleted + LogIndexes LogIndexes `xorm:"LONGBLOB"` // line number to offset + LogExpired bool `xorm:"index(stopped_log_expired)"` // files that are too old will be deleted Created timeutil.TimeStamp `xorm:"created"` Updated timeutil.TimeStamp `xorm:"updated index"` @@ -470,6 +470,16 @@ func StopTask(ctx context.Context, taskID int64, status Status) error { return nil } +func FindOldTasksToExpire(ctx context.Context, olderThan timeutil.TimeStamp, limit int) ([]*ActionTask, error) { + e := db.GetEngine(ctx) + + tasks := make([]*ActionTask, 0, limit) + // Check "stopped > 0" to avoid deleting tasks that are still running + return tasks, e.Where("stopped > 0 AND stopped < ? AND log_expired = ?", olderThan, false). + Limit(limit). + Find(&tasks) +} + func isSubset(set, subset []string) bool { m := make(container.Set[string], len(set)) for _, v := range set { diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index e082cd2a22..d7e951f8bc 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -597,6 +597,8 @@ var migrations = []Migration{ NewMigration("Add force-push branch protection support", v1_23.AddForcePushBranchProtection), // v301 -> v302 NewMigration("Add skip_secondary_authorization option to oauth2 application table", v1_23.AddSkipSecondaryAuthColumnToOAuth2ApplicationTable), + // v302 -> v303 + NewMigration("Add index to action_task stopped log_expired", v1_23.AddIndexToActionTaskStoppedLogExpired), } // GetCurrentDBVersion returns the current db version diff --git a/models/migrations/v1_23/v302.go b/models/migrations/v1_23/v302.go new file mode 100644 index 0000000000..d7ea03eb3d --- /dev/null +++ b/models/migrations/v1_23/v302.go @@ -0,0 +1,18 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package v1_23 //nolint + +import ( + "code.gitea.io/gitea/modules/timeutil" + + "xorm.io/xorm" +) + +func AddIndexToActionTaskStoppedLogExpired(x *xorm.Engine) error { + type ActionTask struct { + Stopped timeutil.TimeStamp `xorm:"index(stopped_log_expired)"` + LogExpired bool `xorm:"index(stopped_log_expired)"` + } + return x.Sync(new(ActionTask)) +} diff --git a/modules/setting/actions.go b/modules/setting/actions.go index 804ed9ec72..2bb8471b64 100644 --- a/modules/setting/actions.go +++ b/modules/setting/actions.go @@ -12,10 +12,11 @@ import ( // Actions settings var ( Actions = struct { - LogStorage *Storage // how the created logs should be stored - ArtifactStorage *Storage // how the created artifacts should be stored - ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"` Enabled bool + LogStorage *Storage // how the created logs should be stored + LogRetentionDays int64 `ini:"LOG_RETENTION_DAYS"` + ArtifactStorage *Storage // how the created artifacts should be stored + ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"` DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"` ZombieTaskTimeout time.Duration `ini:"ZOMBIE_TASK_TIMEOUT"` EndlessTaskTimeout time.Duration `ini:"ENDLESS_TASK_TIMEOUT"` @@ -61,10 +62,17 @@ func loadActionsFrom(rootCfg ConfigProvider) error { if err != nil { return err } + // default to 1 year + if Actions.LogRetentionDays <= 0 { + Actions.LogRetentionDays = 365 + } actionsSec, _ := rootCfg.GetSection("actions.artifacts") Actions.ArtifactStorage, err = getStorage(rootCfg, "actions_artifacts", "", actionsSec) + if err != nil { + return err + } // default to 90 days in Github Actions if Actions.ArtifactRetentionDays <= 0 { @@ -75,5 +83,5 @@ func loadActionsFrom(rootCfg ConfigProvider) error { Actions.EndlessTaskTimeout = sec.Key("ENDLESS_TASK_TIMEOUT").MustDuration(3 * time.Hour) Actions.AbandonedJobTimeout = sec.Key("ABANDONED_JOB_TIMEOUT").MustDuration(24 * time.Hour) - return err + return nil } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 9821c1a293..17d37f0ea2 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3827,6 +3827,7 @@ runs.no_workflows.quick_start = Don't know how to start with Forgejo Actions? Se runs.no_workflows.documentation = For more information on Forgejo Actions, see the documentation. runs.no_runs = The workflow has no runs yet. runs.empty_commit_message = (empty commit message) +runs.expire_log_message = Logs have been purged because they were too old. workflow.disable = Disable workflow workflow.disable_success = Workflow "%s" disabled successfully. diff --git a/routers/web/repo/actions/view.go b/routers/web/repo/actions/view.go index e08e76b78b..bc1ecbfc1e 100644 --- a/routers/web/repo/actions/view.go +++ b/routers/web/repo/actions/view.go @@ -271,6 +271,27 @@ func ViewPost(ctx *context_module.Context) { step := steps[cursor.Step] + // if task log is expired, return a consistent log line + if task.LogExpired { + if cursor.Cursor == 0 { + resp.Logs.StepsLog = append(resp.Logs.StepsLog, &ViewStepLog{ + Step: cursor.Step, + Cursor: 1, + Lines: []*ViewStepLogLine{ + { + Index: 1, + Message: ctx.Locale.TrString("actions.runs.expire_log_message"), + // Timestamp doesn't mean anything when the log is expired. + // Set it to the task's updated time since it's probably the time when the log has expired. + Timestamp: float64(task.Updated.AsTime().UnixNano()) / float64(time.Second), + }, + }, + Started: int64(step.Started), + }) + } + continue + } + logLines := make([]*ViewStepLogLine, 0) // marshal to '[]' instead of 'null' in json index := step.LogIndex + cursor.Cursor diff --git a/services/actions/cleanup.go b/services/actions/cleanup.go index 6ccc8dd198..1223ebcab6 100644 --- a/services/actions/cleanup.go +++ b/services/actions/cleanup.go @@ -5,18 +5,30 @@ package actions import ( "context" + "fmt" + "time" - "code.gitea.io/gitea/models/actions" + actions_model "code.gitea.io/gitea/models/actions" + actions_module "code.gitea.io/gitea/modules/actions" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/timeutil" ) // Cleanup removes expired actions logs, data and artifacts -func Cleanup(taskCtx context.Context) error { - // TODO: clean up expired actions logs - +func Cleanup(ctx context.Context) error { // clean up expired artifacts - return CleanupArtifacts(taskCtx) + if err := CleanupArtifacts(ctx); err != nil { + return fmt.Errorf("cleanup artifacts: %w", err) + } + + // clean up old logs + if err := CleanupLogs(ctx); err != nil { + return fmt.Errorf("cleanup logs: %w", err) + } + + return nil } // CleanupArtifacts removes expired add need-deleted artifacts and set records expired status @@ -28,13 +40,13 @@ func CleanupArtifacts(taskCtx context.Context) error { } func cleanExpiredArtifacts(taskCtx context.Context) error { - artifacts, err := actions.ListNeedExpiredArtifacts(taskCtx) + artifacts, err := actions_model.ListNeedExpiredArtifacts(taskCtx) if err != nil { return err } log.Info("Found %d expired artifacts", len(artifacts)) for _, artifact := range artifacts { - if err := actions.SetArtifactExpired(taskCtx, artifact.ID); err != nil { + if err := actions_model.SetArtifactExpired(taskCtx, artifact.ID); err != nil { log.Error("Cannot set artifact %d expired: %v", artifact.ID, err) continue } @@ -52,13 +64,13 @@ const deleteArtifactBatchSize = 100 func cleanNeedDeleteArtifacts(taskCtx context.Context) error { for { - artifacts, err := actions.ListPendingDeleteArtifacts(taskCtx, deleteArtifactBatchSize) + artifacts, err := actions_model.ListPendingDeleteArtifacts(taskCtx, deleteArtifactBatchSize) if err != nil { return err } log.Info("Found %d artifacts pending deletion", len(artifacts)) for _, artifact := range artifacts { - if err := actions.SetArtifactDeleted(taskCtx, artifact.ID); err != nil { + if err := actions_model.SetArtifactDeleted(taskCtx, artifact.ID); err != nil { log.Error("Cannot set artifact %d deleted: %v", artifact.ID, err) continue } @@ -75,3 +87,40 @@ func cleanNeedDeleteArtifacts(taskCtx context.Context) error { } return nil } + +const deleteLogBatchSize = 100 + +// CleanupLogs removes logs which are older than the configured retention time +func CleanupLogs(ctx context.Context) error { + olderThan := timeutil.TimeStampNow().AddDuration(-time.Duration(setting.Actions.LogRetentionDays) * 24 * time.Hour) + + count := 0 + for { + tasks, err := actions_model.FindOldTasksToExpire(ctx, olderThan, deleteLogBatchSize) + if err != nil { + return fmt.Errorf("find old tasks: %w", err) + } + for _, task := range tasks { + if err := actions_module.RemoveLogs(ctx, task.LogInStorage, task.LogFilename); err != nil { + log.Error("Failed to remove log %s (in storage %v) of task %v: %v", task.LogFilename, task.LogInStorage, task.ID, err) + // do not return error here, continue to next task + continue + } + task.LogIndexes = nil // clear log indexes since it's a heavy field + task.LogExpired = true + if err := actions_model.UpdateTask(ctx, task, "log_indexes", "log_expired"); err != nil { + log.Error("Failed to update task %v: %v", task.ID, err) + // do not return error here, continue to next task + continue + } + count++ + log.Trace("Removed log %s of task %v", task.LogFilename, task.ID) + } + if len(tasks) < deleteLogBatchSize { + break + } + } + + log.Info("Removed %d logs", count) + return nil +} diff --git a/services/cron/tasks_actions.go b/services/cron/tasks_actions.go index 9b5e0b9f41..59cfe36d14 100644 --- a/services/cron/tasks_actions.go +++ b/services/cron/tasks_actions.go @@ -68,7 +68,7 @@ func registerScheduleTasks() { func registerActionsCleanup() { RegisterTaskFatal("cleanup_actions", &BaseConfig{ Enabled: true, - RunAtStart: true, + RunAtStart: false, Schedule: "@midnight", }, func(ctx context.Context, _ *user_model.User, _ Config) error { return actions_service.Cleanup(ctx)