fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations:

- `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config`
- `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized
- every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub)

### Test suite

- capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network)
- drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`)

---
This PR was written with the help of Claude Opus 4.7

Reviewed-on: https://gitea.com/gitea/runner/pulls/994
Reviewed-by: Nicolas <bircni@icloud.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-committed-by: silverwind <me@silverwind.io>
This commit is contained in:
silverwind
2026-05-29 05:23:10 +00:00
committed by silverwind
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions

View File

@@ -20,7 +20,9 @@ import (
"path/filepath"
"regexp"
"runtime"
"slices"
"strings"
"sync"
"time"
"gitea.com/gitea/runner/act/common"
@@ -55,6 +57,10 @@ type RunContext struct {
Masks []string
cleanUpJobContainer common.Executor
caller *caller // job calling this RunContext (reusable workflows)
// outputTemplate is this combination's pristine snapshot of the job's output expressions,
// captured before execution so each matrix combo interpolates from the originals rather
// than from a sibling's already-resolved values written into the shared Job.Outputs.
outputTemplate map[string]string
}
func (rc *RunContext) AddMask(mask string) {
@@ -130,17 +136,34 @@ func getDockerDaemonSocketMountPath(daemonPath string) string {
return daemonPath
}
// containerDaemonSocket returns the configured Docker daemon socket, applying the default
// without mutating the shared Config. Parallel jobs in a plan share one *Config, so a job
// must never write to it.
func (rc *RunContext) containerDaemonSocket() string {
if rc.Config.ContainerDaemonSocket == "" {
return "/var/run/docker.sock"
}
return rc.Config.ContainerDaemonSocket
}
// validVolumes returns the volumes allowed on this job's containers: the configured base
// plus the volumes the runner mounts automatically. It derives a fresh slice every call and
// never mutates the shared Config (see containerDaemonSocket).
func (rc *RunContext) validVolumes() []string {
name := rc.jobContainerName()
volumes := slices.Clone(rc.Config.ValidVolumes)
// TODO: add a new configuration to control whether the docker daemon can be mounted
return append(volumes, "act-toolcache", name, name+"-env",
getDockerDaemonSocketMountPath(rc.containerDaemonSocket()))
}
// Returns the binds and mounts for the container, resolving paths as appopriate
func (rc *RunContext) GetBindsAndMounts() ([]string, map[string]string) {
name := rc.jobContainerName()
if rc.Config.ContainerDaemonSocket == "" {
rc.Config.ContainerDaemonSocket = "/var/run/docker.sock"
}
binds := []string{}
if rc.Config.ContainerDaemonSocket != "-" {
daemonPath := getDockerDaemonSocketMountPath(rc.Config.ContainerDaemonSocket)
if daemonSocket := rc.containerDaemonSocket(); daemonSocket != "-" {
daemonPath := getDockerDaemonSocketMountPath(daemonSocket)
binds = append(binds, fmt.Sprintf("%s:%s", daemonPath, "/var/run/docker.sock"))
}
@@ -179,14 +202,6 @@ func (rc *RunContext) GetBindsAndMounts() ([]string, map[string]string) {
mounts[name] = ext.ToContainerPath(rc.Config.Workdir)
}
// For Gitea
// add some default binds and mounts to ValidVolumes
rc.Config.ValidVolumes = append(rc.Config.ValidVolumes, "act-toolcache")
rc.Config.ValidVolumes = append(rc.Config.ValidVolumes, name)
rc.Config.ValidVolumes = append(rc.Config.ValidVolumes, name+"-env")
// TODO: add a new configuration to control whether the docker daemon can be mounted
rc.Config.ValidVolumes = append(rc.Config.ValidVolumes, getDockerDaemonSocketMountPath(rc.Config.ContainerDaemonSocket))
return binds, mounts
}
@@ -432,7 +447,7 @@ func (rc *RunContext) startJobContainer() common.Executor {
Platform: rc.Config.ContainerArchitecture,
Options: rc.options(ctx),
AutoRemove: rc.Config.AutoRemove,
ValidVolumes: rc.Config.ValidVolumes,
ValidVolumes: rc.validVolumes(),
AllocatePTY: rc.Config.AllocatePTY,
})
if rc.JobContainer == nil {
@@ -586,14 +601,29 @@ func (rc *RunContext) ActionCacheDir() string {
}
// Interpolate outputs after a job is done
// jobMutexes serializes per-job result/output aggregation across the matrix combinations that
// share one *model.Job and run in parallel. Keyed by the shared *model.Job (mirrors the
// per-directory AcquireCloneLock pattern).
var jobMutexes sync.Map // key: *model.Job; value: *sync.Mutex
func lockJob(job *model.Job) func() {
v, _ := jobMutexes.LoadOrStore(job, &sync.Mutex{})
mu := v.(*sync.Mutex)
mu.Lock()
return mu.Unlock
}
func (rc *RunContext) interpolateOutputs() common.Executor {
return func(ctx context.Context) error {
ee := rc.NewExpressionEvaluator(ctx)
for k, v := range rc.Run.Job().Outputs {
interpolated := ee.Interpolate(ctx, v)
if v != interpolated {
rc.Run.Job().Outputs[k] = interpolated
}
job := rc.Run.Job()
// Matrix combinations share this Job and its Outputs map. Interpolate from this combo's
// pristine snapshot (outputTemplate) and write under the lock, so each combo overwrites
// with its own resolved values (last wins, as on GitHub) instead of the first combo's
// resolved values freezing the shared template against later combos.
defer lockJob(job)()
for k, v := range rc.outputTemplate {
job.Outputs[k] = ee.Interpolate(ctx, v)
}
return nil
}
@@ -660,7 +690,18 @@ func (rc *RunContext) result(result string) {
}
func (rc *RunContext) steps() []*model.Step {
return rc.Run.Job().Steps
// Return per-job copies of the steps. Matrix combinations run in parallel and share the
// workflow model, but step execution mutates per-job fields and evaluates the If/Env nodes
// in place, so the *model.Step instances must not be shared across jobs (see Step.Clone).
shared := rc.Run.Job().Steps
steps := make([]*model.Step, len(shared))
for i, step := range shared {
if step == nil {
continue
}
steps[i] = step.Clone()
}
return steps
}
// Executor returns a pipeline executor for all the steps in the job
@@ -737,12 +778,15 @@ func (rc *RunContext) runsOnPlatformNames(ctx context.Context) []string {
return []string{}
}
if err := rc.ExprEval.EvaluateYamlNode(ctx, &job.RawRunsOn); err != nil {
// Evaluate a copy: RawRunsOn is shared across parallel matrix jobs, so interpolating it in
// place would race and leak one matrix combination's runs-on into the others.
rawRunsOn := model.CloneYamlNode(job.RawRunsOn)
if err := rc.ExprEval.EvaluateYamlNode(ctx, &rawRunsOn); err != nil {
common.Logger(ctx).Errorf("Error while evaluating runs-on: %v", err)
return []string{}
}
return job.RunsOn()
return model.RunsOnFromNode(rawRunsOn)
}
func (rc *RunContext) platformImage(ctx context.Context) string {
@@ -1165,12 +1209,9 @@ func (rc *RunContext) handleServiceCredentials(ctx context.Context, creds map[st
// GetServiceBindsAndMounts returns the binds and mounts for the service container, resolving paths as appopriate
func (rc *RunContext) GetServiceBindsAndMounts(svcVolumes []string) ([]string, map[string]string) {
if rc.Config.ContainerDaemonSocket == "" {
rc.Config.ContainerDaemonSocket = "/var/run/docker.sock"
}
binds := []string{}
if rc.Config.ContainerDaemonSocket != "-" {
daemonPath := getDockerDaemonSocketMountPath(rc.Config.ContainerDaemonSocket)
if daemonSocket := rc.containerDaemonSocket(); daemonSocket != "-" {
daemonPath := getDockerDaemonSocketMountPath(daemonSocket)
binds = append(binds, fmt.Sprintf("%s:%s", daemonPath, "/var/run/docker.sock"))
}