fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations:

- `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config`
- `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized
- every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub)

### Test suite

- capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network)
- drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`)

---
This PR was written with the help of Claude Opus 4.7

Reviewed-on: https://gitea.com/gitea/runner/pulls/994
Reviewed-by: Nicolas <bircni@icloud.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-committed-by: silverwind <me@silverwind.io>
This commit is contained in:
silverwind
2026-05-29 05:23:10 +00:00
committed by silverwind
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions

View File

@@ -8,6 +8,7 @@ import (
"context"
"encoding/json"
"fmt"
"maps"
"os"
"runtime"
"sync"
@@ -250,7 +251,14 @@ func (runner *runnerImpl) NewPlanExecutor(plan *model.Plan) common.Executor {
return executor(common.WithJobErrorContainer(WithJobLogger(ctx, rc.Run.JobID, jobName, rc.Config, &rc.Masks, matrix)))
})
}
pipeline = append(pipeline, common.NewParallelExecutor(maxParallel, stageExecutor...))
// Run all matrix combinations of this job, then drop its aggregation mutex: the
// combos are the only users of it, so once they finish the jobMutexes entry can be
// released, keeping the map from growing unbounded over a long-lived runner.
stageParallel := common.NewParallelExecutor(maxParallel, stageExecutor...)
pipeline = append(pipeline, func(ctx context.Context) error {
defer jobMutexes.Delete(job)
return stageParallel(ctx)
})
}
// For pipeline execution:
@@ -334,6 +342,11 @@ func (runner *runnerImpl) newRunContext(ctx context.Context, run *model.Run, mat
}
rc.ExprEval = rc.NewExpressionEvaluator(ctx)
rc.Name = rc.ExprEval.Interpolate(ctx, run.String())
// Snapshot the job's pristine output expressions now, before any matrix combo runs and
// rewrites the shared Job.Outputs (see interpolateOutputs).
if job := run.Job(); job != nil {
rc.outputTemplate = maps.Clone(job.Outputs)
}
return rc
}