mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-22 09:44:24 +02:00
feat: Enable jobs.<job_id>.timeout-minutes and jobs.<job_id>.continue-on-error (#1032)
Two `jobs.<job_id>` workflow syntax fields were parsed from YAML but silently ignored. This PR implements both:
- **`jobs.<job_id>.timeout-minutes`** — applies a context deadline around the entire job execution (container start, pre-steps, main steps, post-steps). Mirrors the existing step-level `evaluateStepTimeout`. Supports expression interpolation (e.g. `${{ env.MY_TIMEOUT }}`).
- **`jobs.<job_id>.continue-on-error`** — evaluates the expression when a job fails. If all failing matrix combinations had `continue-on-error: true`, the job does not cause the workflow run to fail (`handleFailure` skips it), and the tolerated failure reports `success` to dependent jobs through the `needs` context so jobs gated on the default `if: success()` still run (matching GitHub). The "any firm failure wins" rule is serialised under the existing per-job lock, so parallel matrix combinations are safe.
Both features follow the same patterns already used at the step level (`evaluateStepTimeout` / `isContinueOnError` in `act/runner/step.go`).
## Version compatibility
These changes are backward compatible. With mismatched versions the feature degrades silently to the previous behaviour (field ignored) — no errors on either side.
- `timeout-minutes`: runner-only, no server dependency.
- `continue-on-error`: requires both this runner PR and the matching Gitea server PR to take full effect. With only one side updated, the field continues to be ignored.
Related: [Github](https://github.com/go-gitea/gitea/pull/38100)
---------
Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com>
Co-authored-by: silverwind <me@silverwind.io>
Reviewed-on: https://gitea.com/gitea/runner/pulls/1032
Reviewed-by: silverwind <2021+silverwind@noreply.gitea.com>
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
@@ -28,6 +28,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
yaml "go.yaml.in/yaml/v4"
|
||||
)
|
||||
|
||||
func TestJobExecutor(t *testing.T) {
|
||||
@@ -347,6 +348,133 @@ func TestNewJobExecutor(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewJobExecutorRunsPostStepsAfterTimeout guards the timeout-minutes cleanup
|
||||
// path: when a job exceeds its timeout the job context is DeadlineExceeded, but
|
||||
// the post steps (cleanup hooks like actions/checkout post and cache save) must
|
||||
// still run against a fresh, non-expired context, and the job must still be
|
||||
// reported as failed.
|
||||
func TestNewJobExecutorRunsPostStepsAfterTimeout(t *testing.T) {
|
||||
ctx := common.WithJobErrorContainer(context.Background())
|
||||
// The timeout is generous so the main step (which blocks on ctx.Done below) is
|
||||
// always reached before the deadline fires; otherwise the pipeline would
|
||||
// short-circuit before the step runs and the job error would never be set.
|
||||
ctx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
jim := &jobInfoMock{}
|
||||
sfm := &stepFactoryMock{}
|
||||
rc := &RunContext{
|
||||
JobContainer: &jobContainerMock{},
|
||||
Run: &model.Run{
|
||||
JobID: "test",
|
||||
Workflow: &model.Workflow{
|
||||
Jobs: map[string]*model.Job{
|
||||
"test": {},
|
||||
},
|
||||
},
|
||||
},
|
||||
Config: &Config{},
|
||||
}
|
||||
rc.ExprEval = rc.NewExpressionEvaluator(ctx)
|
||||
|
||||
stepModel := &model.Step{ID: "1"}
|
||||
jim.On("steps").Return([]*model.Step{stepModel})
|
||||
jim.On("matrix").Return(map[string]any{})
|
||||
jim.On("startContainer").Return(func(ctx context.Context) error { return nil })
|
||||
jim.On("interpolateOutputs").Return(func(ctx context.Context) error { return nil })
|
||||
jim.On("closeContainer").Return(func(ctx context.Context) error { return nil })
|
||||
// The job timed out, so it must be reported as failed. stopContainer is left
|
||||
// unexpected on purpose: a timed-out (failed) job preserves its error state, so
|
||||
// the graceful stop is skipped exactly like any other failure without AutoRemove.
|
||||
jim.On("result", "failure")
|
||||
|
||||
sm := &stepMock{}
|
||||
sfm.On("newStep", stepModel, rc).Return(sm, nil)
|
||||
sm.On("pre").Return(func(ctx context.Context) error { return nil })
|
||||
// The main step runs past the job timeout: it blocks until the job context is
|
||||
// done, mirroring a step that overruns timeout-minutes.
|
||||
sm.On("main").Return(func(ctx context.Context) error {
|
||||
<-ctx.Done()
|
||||
return ctx.Err()
|
||||
})
|
||||
|
||||
var postRan bool
|
||||
var postCtxErr error
|
||||
sm.On("post").Return(func(ctx context.Context) error {
|
||||
postRan = true
|
||||
postCtxErr = ctx.Err()
|
||||
return nil
|
||||
})
|
||||
|
||||
executor := newJobExecutor(jim, sfm, rc)
|
||||
// The executor itself returns nil on timeout: the failure is surfaced through
|
||||
// the job result ("failure", asserted via the result mock below), not the
|
||||
// return value.
|
||||
require.NoError(t, executor(ctx))
|
||||
|
||||
assert.True(t, postRan, "post step must run after a job timeout")
|
||||
require.NoError(t, postCtxErr, "post step must run against a fresh, non-expired context")
|
||||
|
||||
jim.AssertExpectations(t)
|
||||
sfm.AssertExpectations(t)
|
||||
sm.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestSetJobResultMatrixContinueOnError exercises the parallel-matrix path
|
||||
// end-to-end: two combinations share one *model.Job and continue-on-error is
|
||||
// keyed on matrix.experimental, so one combination tolerates its failure and the
|
||||
// other does not. The job is reported as continue-on-error only when EVERY failing
|
||||
// combination was tolerated; a single firm failure makes the whole job firm, and
|
||||
// handleFailure then fails the run.
|
||||
func TestSetJobResultMatrixContinueOnError(t *testing.T) {
|
||||
const jobYAML = "continue-on-error: ${{ matrix.experimental }}\nruns-on: ubuntu-latest"
|
||||
|
||||
newSharedJob := func(t *testing.T) (*model.Job, *model.Workflow) {
|
||||
t.Helper()
|
||||
var job *model.Job
|
||||
require.NoError(t, yaml.Unmarshal([]byte(jobYAML), &job))
|
||||
return job, &model.Workflow{
|
||||
Name: "workflow1",
|
||||
Jobs: map[string]*model.Job{"job1": job},
|
||||
}
|
||||
}
|
||||
|
||||
planFor := func(wf *model.Workflow) *model.Plan {
|
||||
return &model.Plan{Stages: []*model.Stage{{Runs: []*model.Run{{Workflow: wf, JobID: "job1"}}}}}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// fail drives a single matrix combination through the failure path; each
|
||||
// RunContext is its own jobInfo (rc implements jobInfo) and shares the job.
|
||||
fail := func(wf *model.Workflow, experimental bool) {
|
||||
rc := newTestRC(wf, map[string]any{"experimental": experimental})
|
||||
setJobResult(ctx, rc, rc, false)
|
||||
}
|
||||
|
||||
t.Run("one tolerated and one firm failure fails the run", func(t *testing.T) {
|
||||
job, wf := newSharedJob(t)
|
||||
// Order is intentional: the tolerated combination finishes first, then the
|
||||
// firm one. The firm-failure latch must still win regardless of order.
|
||||
fail(wf, true)
|
||||
fail(wf, false)
|
||||
|
||||
assert.Equal(t, "failure", job.Result)
|
||||
assert.False(t, job.ContinueOnError, "a single firm failure must make the whole job firm")
|
||||
assert.Error(t, handleFailure(planFor(wf))(ctx))
|
||||
})
|
||||
|
||||
t.Run("all tolerated failures do not fail the run", func(t *testing.T) {
|
||||
job, wf := newSharedJob(t)
|
||||
fail(wf, true)
|
||||
fail(wf, true)
|
||||
|
||||
assert.Equal(t, "failure", job.Result)
|
||||
assert.True(t, job.ContinueOnError, "every failing combination was tolerated")
|
||||
assert.NoError(t, handleFailure(planFor(wf))(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
func TestHasJobSummaryCapability(t *testing.T) {
|
||||
assert.True(t, hasJobSummaryCapability("cache,job-summary artifacts"))
|
||||
assert.True(t, hasJobSummaryCapability("cache,\njob-summary\tartifacts"))
|
||||
@@ -674,3 +802,104 @@ func tarArchive(t *testing.T, entries ...tarEntry) []byte {
|
||||
require.NoError(t, tw.Close())
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func newTestRC(wf *model.Workflow, matrix map[string]any) *RunContext {
|
||||
return &RunContext{
|
||||
Config: &Config{
|
||||
Workdir: ".",
|
||||
Platforms: map[string]string{
|
||||
"ubuntu-latest": "ubuntu-latest",
|
||||
},
|
||||
},
|
||||
StepResults: map[string]*model.StepResult{},
|
||||
Env: map[string]string{},
|
||||
Matrix: matrix,
|
||||
Run: &model.Run{JobID: "job1", Workflow: wf},
|
||||
}
|
||||
}
|
||||
|
||||
func makeTestRC(t *testing.T, jobYAML string) *RunContext {
|
||||
t.Helper()
|
||||
var job *model.Job
|
||||
require.NoError(t, yaml.Unmarshal([]byte(jobYAML), &job))
|
||||
rc := newTestRC(&model.Workflow{
|
||||
Name: "workflow1",
|
||||
Jobs: map[string]*model.Job{"job1": job},
|
||||
}, nil)
|
||||
rc.ExprEval = rc.NewExpressionEvaluator(context.Background())
|
||||
return rc
|
||||
}
|
||||
|
||||
func TestApplyJobTimeout(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
yaml string
|
||||
wantTimeout bool
|
||||
}{
|
||||
{"empty", "runs-on: ubuntu-latest", false},
|
||||
{"integer", "timeout-minutes: 5\nruns-on: ubuntu-latest", true},
|
||||
{"non-numeric ignored", "timeout-minutes: abc\nruns-on: ubuntu-latest", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
rc := makeTestRC(t, tc.yaml)
|
||||
ctx := context.Background()
|
||||
newCtx, cancel := applyJobTimeout(ctx, rc, rc.Run.Job())
|
||||
defer cancel()
|
||||
_, hasDeadline := newCtx.Deadline()
|
||||
assert.Equal(t, tc.wantTimeout, hasDeadline)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvaluateJobContinueOnError(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
yaml string
|
||||
want bool
|
||||
}{
|
||||
{"absent", "runs-on: ubuntu-latest", false},
|
||||
{"true", "continue-on-error: true\nruns-on: ubuntu-latest", true},
|
||||
{"false", "continue-on-error: false\nruns-on: ubuntu-latest", false},
|
||||
{"expression true", "continue-on-error: ${{ 'x' == 'x' }}\nruns-on: ubuntu-latest", true},
|
||||
{"expression false", "continue-on-error: ${{ 'x' != 'x' }}\nruns-on: ubuntu-latest", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
rc := makeTestRC(t, tc.yaml)
|
||||
got := evaluateJobContinueOnError(context.Background(), rc, rc.Run.Job())
|
||||
assert.Equal(t, tc.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestJobSetContinueOnError(t *testing.T) {
|
||||
t.Run("first call true", func(t *testing.T) {
|
||||
j := &model.Job{}
|
||||
j.SetContinueOnError(true)
|
||||
assert.True(t, j.ContinueOnError)
|
||||
})
|
||||
t.Run("first call false", func(t *testing.T) {
|
||||
j := &model.Job{}
|
||||
j.SetContinueOnError(false)
|
||||
assert.False(t, j.ContinueOnError)
|
||||
})
|
||||
t.Run("true then false locks to false", func(t *testing.T) {
|
||||
j := &model.Job{}
|
||||
j.SetContinueOnError(true)
|
||||
j.SetContinueOnError(false)
|
||||
assert.False(t, j.ContinueOnError)
|
||||
})
|
||||
t.Run("false then true stays false", func(t *testing.T) {
|
||||
j := &model.Job{}
|
||||
j.SetContinueOnError(false)
|
||||
j.SetContinueOnError(true)
|
||||
assert.False(t, j.ContinueOnError)
|
||||
})
|
||||
t.Run("true then true stays true", func(t *testing.T) {
|
||||
j := &model.Job{}
|
||||
j.SetContinueOnError(true)
|
||||
j.SetContinueOnError(true)
|
||||
assert.True(t, j.ContinueOnError)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user