fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations:

- `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config`
- `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized
- every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub)

### Test suite

- capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network)
- drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`)

---
This PR was written with the help of Claude Opus 4.7

Reviewed-on: https://gitea.com/gitea/runner/pulls/994
Reviewed-by: Nicolas <bircni@icloud.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-committed-by: silverwind <me@silverwind.io>
This commit is contained in:
silverwind
2026-05-29 05:23:10 +00:00
committed by silverwind
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions

View File

@@ -325,14 +325,20 @@ func (j *Job) Needs() []string {
// RunsOn list for Job
func (j *Job) RunsOn() []string {
switch j.RawRunsOn.Kind {
return RunsOnFromNode(j.RawRunsOn)
}
// RunsOnFromNode parses the runs-on labels from a raw runs-on node, so callers can evaluate a
// copy of the node (avoiding mutation of the shared Job) before reading the labels.
func RunsOnFromNode(rawRunsOn yaml.Node) []string {
switch rawRunsOn.Kind {
case yaml.MappingNode:
var val struct {
Group string
Labels yaml.Node
}
if !decodeNode(j.RawRunsOn, &val) {
if !decodeNode(rawRunsOn, &val) {
return nil
}
@@ -344,7 +350,7 @@ func (j *Job) RunsOn() []string {
return labels
default:
return nodeAsStringSlice(j.RawRunsOn)
return nodeAsStringSlice(rawRunsOn)
}
}
@@ -645,6 +651,33 @@ type Step struct {
TimeoutMinutes string `yaml:"timeout-minutes"`
}
// Clone returns a deep copy safe to mutate independently of s. Job steps are shared across
// parallel matrix runs, which mutate per-job fields (ID, Number, Shell) and evaluate the If/Env
// yaml.Nodes in place, so each job must own its copy.
func (s *Step) Clone() *Step {
clone := *s
clone.If = CloneYamlNode(s.If)
clone.Env = CloneYamlNode(s.Env)
clone.With = maps.Clone(s.With)
return &clone
}
// CloneYamlNode returns a deep copy of a yaml.Node so callers can evaluate it in place without
// mutating a node shared across parallel jobs.
func CloneYamlNode(n yaml.Node) yaml.Node {
clone := n
if n.Content != nil {
clone.Content = make([]*yaml.Node, len(n.Content))
for i, child := range n.Content {
if child != nil {
childClone := CloneYamlNode(*child)
clone.Content[i] = &childClone
}
}
}
return clone
}
// String gets the name of step
func (s *Step) String() string {
if s.Name != "" {