fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations:

- `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config`
- `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized
- every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub)

### Test suite

- capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network)
- drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`)

---
This PR was written with the help of Claude Opus 4.7

Reviewed-on: https://gitea.com/gitea/runner/pulls/994
Reviewed-by: Nicolas <bircni@icloud.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-committed-by: silverwind <me@silverwind.io>
This commit is contained in:
silverwind
2026-05-29 05:23:10 +00:00
committed by silverwind
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions

View File

@@ -183,18 +183,25 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success bool) {
logger := common.Logger(ctx)
jobResult := "success"
// we have only one result for a whole matrix build, so we need
// to keep an existing result state if we run a matrix
if len(info.matrix()) > 0 && rc.Run.Job().Result != "" {
jobResult = rc.Run.Job().Result
}
// Matrix combinations share one *model.Job and run in parallel; serialize the
// read-modify-write of the job result so a failing combination is not lost-updated by a
// concurrent succeeding one.
job := rc.Run.Job()
jobResult := func() string {
defer lockJob(job)()
result := "success"
// we have only one result for a whole matrix build, so we need
// to keep an existing result state if we run a matrix
if len(info.matrix()) > 0 && job.Result != "" {
result = job.Result
}
if !success {
result = "failure"
}
info.result(result)
return result
}()
if !success {
jobResult = "failure"
}
info.result(jobResult)
if rc.caller != nil {
// set reusable workflow job result
rc.caller.setReusedWorkflowJobResult(rc.JobName, jobResult) // For Gitea
@@ -220,7 +227,11 @@ func setJobOutputs(ctx context.Context, rc *RunContext) {
callerOutputs[k] = ee.Interpolate(ctx, ee.Interpolate(ctx, v.Value))
}
rc.caller.runContext.Run.Job().Outputs = callerOutputs
// Matrix combinations of a reusable-workflow caller share the caller's *model.Job;
// serialize the write so parallel combos don't race on its Outputs field.
callerJob := rc.caller.runContext.Run.Job()
defer lockJob(callerJob)()
callerJob.Outputs = callerOutputs
}
}