feat: Enable jobs.<job_id>.timeout-minutes and jobs.<job_id>.continue-on-error (#1032)

Two `jobs.<job_id>` workflow syntax fields were parsed from YAML but silently ignored. This PR implements both:

- **`jobs.<job_id>.timeout-minutes`** — applies a context deadline around the entire job execution (container start, pre-steps, main steps, post-steps). Mirrors the existing step-level `evaluateStepTimeout`. Supports expression interpolation (e.g. `${{ env.MY_TIMEOUT }}`).

- **`jobs.<job_id>.continue-on-error`** — evaluates the expression when a job fails. If all failing matrix combinations had `continue-on-error: true`, the job does not cause the workflow run to fail (`handleFailure` skips it), and the tolerated failure reports `success` to dependent jobs through the `needs` context so jobs gated on the default `if: success()` still run (matching GitHub). The "any firm failure wins" rule is serialised under the existing per-job lock, so parallel matrix combinations are safe.

Both features follow the same patterns already used at the step level (`evaluateStepTimeout` / `isContinueOnError` in `act/runner/step.go`).

## Version compatibility

These changes are backward compatible. With mismatched versions the feature degrades silently to the previous behaviour (field ignored) — no errors on either side.

- `timeout-minutes`: runner-only, no server dependency.
- `continue-on-error`: requires both this runner PR and the matching Gitea server PR to take full effect. With only one side updated, the field continues to be ignored.

Related: [Github](https://github.com/go-gitea/gitea/pull/38100)
---------

Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com>
Co-authored-by: silverwind <me@silverwind.io>
Reviewed-on: https://gitea.com/gitea/runner/pulls/1032
Reviewed-by: silverwind <2021+silverwind@noreply.gitea.com>
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
Nicolas
2026-06-21 17:05:36 +00:00
parent 007717956a
commit 6bdcb54828
7 changed files with 355 additions and 24 deletions

View File

@@ -22,6 +22,7 @@ import (
"gitea.com/gitea/runner/act/common"
"gitea.com/gitea/runner/act/container"
"gitea.com/gitea/runner/act/exprparser"
"gitea.com/gitea/runner/act/model"
)
@@ -204,11 +205,21 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
return common.NewPipelineExecutor(info.startContainer(), common.NewPipelineExecutor(pipeline...).
Finally(func(ctx context.Context) error {
var cancel context.CancelFunc
if ctx.Err() == context.Canceled {
switch ctx.Err() {
case context.Canceled:
// in case of an aborted run, we still should execute the
// post steps to allow cleanup.
ctx, cancel = context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), 5*time.Minute)
defer cancel()
case context.DeadlineExceeded:
// The job hit its timeout-minutes. Without a fresh context the post
// steps would run against the already-expired context and be skipped,
// so cleanup post-hooks (e.g. actions/checkout post, cache save) would
// not run. Derive the context with WithoutCancel so the new deadline
// applies but the job error state is preserved: the job is still
// reported as failed and container teardown matches a normal failure.
ctx, cancel = context.WithTimeout(context.WithoutCancel(ctx), 5*time.Minute)
defer cancel()
}
return postExecutor(ctx)
}).
@@ -223,6 +234,12 @@ func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success boo
// read-modify-write of the job result so a failing combination is not lost-updated by a
// concurrent succeeding one.
job := rc.Run.Job()
var continueOnError bool
if !success {
// Use a fresh context so an expired job timeout cannot block expression evaluation.
evalCtx := common.WithLogger(context.Background(), common.Logger(ctx))
continueOnError = evaluateJobContinueOnError(evalCtx, rc, job)
}
jobResult := func() string {
defer lockJob(job)()
result := "success"
@@ -233,6 +250,7 @@ func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success boo
}
if !success {
result = "failure"
job.SetContinueOnError(continueOnError)
}
info.result(result)
return result
@@ -271,6 +289,32 @@ func setJobOutputs(ctx context.Context, rc *RunContext) {
}
}
// applyJobTimeout applies the job-level timeout-minutes to ctx, mirroring the
// step-level evaluateStepTimeout in step.go.
func applyJobTimeout(ctx context.Context, rc *RunContext, job *model.Job) (context.Context, context.CancelFunc) {
timeout := rc.ExprEval.Interpolate(ctx, job.TimeoutMinutes)
if timeout != "" {
if timeoutMinutes, err := strconv.ParseInt(timeout, 10, 64); err == nil {
return context.WithTimeout(ctx, time.Duration(timeoutMinutes)*time.Minute)
}
}
return ctx, func() {}
}
// evaluateJobContinueOnError evaluates the job-level continue-on-error expression.
func evaluateJobContinueOnError(ctx context.Context, rc *RunContext, job *model.Job) bool {
expr := strings.TrimSpace(job.RawContinueOnError)
if expr == "" {
return false
}
continueOnError, err := EvalBool(ctx, rc.NewExpressionEvaluator(ctx), expr, exprparser.DefaultStatusCheckNone)
if err != nil {
common.Logger(ctx).Warnf("continue-on-error expression %q evaluation failed: %v", expr, err)
return false
}
return continueOnError
}
func tryUploadJobSummary(ctx context.Context, rc *RunContext) {
if rc == nil || rc.JobContainer == nil || rc.Config == nil {
return