fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations:

- `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config`
- `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized
- every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub)

### Test suite

- capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network)
- drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`)

---
This PR was written with the help of Claude Opus 4.7

Reviewed-on: https://gitea.com/gitea/runner/pulls/994
Reviewed-by: Nicolas <bircni@icloud.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-committed-by: silverwind <me@silverwind.io>
This commit is contained in:
silverwind
2026-05-29 05:23:10 +00:00
committed by silverwind
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions

View File

@@ -325,14 +325,20 @@ func (j *Job) Needs() []string {
// RunsOn list for Job
func (j *Job) RunsOn() []string {
switch j.RawRunsOn.Kind {
return RunsOnFromNode(j.RawRunsOn)
}
// RunsOnFromNode parses the runs-on labels from a raw runs-on node, so callers can evaluate a
// copy of the node (avoiding mutation of the shared Job) before reading the labels.
func RunsOnFromNode(rawRunsOn yaml.Node) []string {
switch rawRunsOn.Kind {
case yaml.MappingNode:
var val struct {
Group string
Labels yaml.Node
}
if !decodeNode(j.RawRunsOn, &val) {
if !decodeNode(rawRunsOn, &val) {
return nil
}
@@ -344,7 +350,7 @@ func (j *Job) RunsOn() []string {
return labels
default:
return nodeAsStringSlice(j.RawRunsOn)
return nodeAsStringSlice(rawRunsOn)
}
}
@@ -645,6 +651,33 @@ type Step struct {
TimeoutMinutes string `yaml:"timeout-minutes"`
}
// Clone returns a deep copy safe to mutate independently of s. Job steps are shared across
// parallel matrix runs, which mutate per-job fields (ID, Number, Shell) and evaluate the If/Env
// yaml.Nodes in place, so each job must own its copy.
func (s *Step) Clone() *Step {
clone := *s
clone.If = CloneYamlNode(s.If)
clone.Env = CloneYamlNode(s.Env)
clone.With = maps.Clone(s.With)
return &clone
}
// CloneYamlNode returns a deep copy of a yaml.Node so callers can evaluate it in place without
// mutating a node shared across parallel jobs.
func CloneYamlNode(n yaml.Node) yaml.Node {
clone := n
if n.Content != nil {
clone.Content = make([]*yaml.Node, len(n.Content))
for i, child := range n.Content {
if child != nil {
childClone := CloneYamlNode(*child)
clone.Content[i] = &childClone
}
}
}
return clone
}
// String gets the name of step
func (s *Step) String() string {
if s.Name != "" {

View File

@@ -9,9 +9,29 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.yaml.in/yaml/v4"
)
// TestStepCloneIsolatesMutableFields guards the parallel-matrix race fix: combinations share the
// job's *Step, and Clone() must hand each a copy whose If/Env nodes and With map can be mutated
// independently. A shallow copy would share Env.Content's backing array (and the With map) and
// leak writes across combinations.
func TestStepCloneIsolatesMutableFields(t *testing.T) {
var orig Step
require.NoError(t, yaml.Unmarshal([]byte("if: ${{ env.X == 'a' }}\nenv:\n KEY: original\nwith:\n arg: original\n"), &orig))
require.Len(t, orig.Env.Content, 2) // [key, value]
clone := orig.Clone()
clone.If.Value = "changed"
clone.Env.Content[1].Value = "changed"
clone.With["arg"] = "changed"
assert.Equal(t, "${{ env.X == 'a' }}", orig.If.Value, "If must not be shared with the clone")
assert.Equal(t, "original", orig.Env.Content[1].Value, "Env nodes must not be shared with the clone")
assert.Equal(t, "original", orig.With["arg"], "With map must not be shared with the clone")
}
func TestReadWorkflow_ScheduleEvent(t *testing.T) {
yaml := `
name: local-action-docker-url