fix: matrix-job data races + outputs, leaner offline test suite (#994)

Running the full suite under `-race` (dropping `-short`) exposed pre-existing data races in parallel matrix-job execution, fixed by not sharing mutable state across combinations: - `containerDaemonSocket()`/`validVolumes()` derive per-job values instead of mutating shared `Config` - `getWorkflowSecrets` builds a fresh map, `rc.steps()` clones each step, and go-git workdir access is serialized - every write to a shared `Job`'s result/outputs runs under a per-`Job` lock, each combo interpolating outputs from a pristine snapshot (last wins, as on GitHub) ### Test suite - capability gates (docker / network / host-tools / Linux) replace the `-short` skips, and the suite runs offline via local fixtures (the artifact flow uses an in-process loopback server, only the docker-action force-pull needs the network) - drops redundant tests, adds a regression test for https://gitea.com/gitea/runner/issues/981 and a docker-in-docker harness (`make test-dind`) --- This PR was written with the help of Claude Opus 4.7 Reviewed-on: https://gitea.com/gitea/runner/pulls/994 Reviewed-by: Nicolas <bircni@icloud.com> Co-authored-by: silverwind <me@silverwind.io> Co-committed-by: silverwind <me@silverwind.io>
2026-06-10 11:34:31 +02:00 · 2026-05-29 05:23:10 +00:00
parent 0b9f251b6a
commit 270ea41232
69 changed files with 969 additions and 1176 deletions
--- a/act/runner/runner.go
+++ b/act/runner/runner.go
@@ -8,6 +8,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"maps"
 	"os"
 	"runtime"
 	"sync"
@@ -250,7 +251,14 @@ func (runner *runnerImpl) NewPlanExecutor(plan *model.Plan) common.Executor {
 						return executor(common.WithJobErrorContainer(WithJobLogger(ctx, rc.Run.JobID, jobName, rc.Config, &rc.Masks, matrix)))
 					})
 				}
-				pipeline = append(pipeline, common.NewParallelExecutor(maxParallel, stageExecutor...))
+				// Run all matrix combinations of this job, then drop its aggregation mutex: the
+				// combos are the only users of it, so once they finish the jobMutexes entry can be
+				// released, keeping the map from growing unbounded over a long-lived runner.
+				stageParallel := common.NewParallelExecutor(maxParallel, stageExecutor...)
+				pipeline = append(pipeline, func(ctx context.Context) error {
+					defer jobMutexes.Delete(job)
+					return stageParallel(ctx)
+				})
 			}

 			// For pipeline execution:
@@ -334,6 +342,11 @@ func (runner *runnerImpl) newRunContext(ctx context.Context, run *model.Run, mat
 	}
 	rc.ExprEval = rc.NewExpressionEvaluator(ctx)
 	rc.Name = rc.ExprEval.Interpolate(ctx, run.String())
+	// Snapshot the job's pristine output expressions now, before any matrix combo runs and
+	// rewrites the shared Job.Outputs (see interpolateOutputs).
+	if job := run.Job(); job != nil {
+		rc.outputTemplate = maps.Clone(job.Outputs)
+	}

 	return rc
 }