mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-15 14:24:22 +02:00
Compare commits
1 Commits
main
...
renovate/g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f223fa44ea |
@@ -12,13 +12,6 @@ import (
|
||||
// LineHandler is a callback function for handling a line
|
||||
type LineHandler func(line string) bool
|
||||
|
||||
// Flusher is implemented by writers that buffer a trailing, not-yet-terminated
|
||||
// line. Callers should flush once the underlying stream has reached EOF so the
|
||||
// final line (when it is not newline-terminated) is not lost.
|
||||
type Flusher interface {
|
||||
Flush()
|
||||
}
|
||||
|
||||
type lineWriter struct {
|
||||
buffer bytes.Buffer
|
||||
handlers []LineHandler
|
||||
@@ -31,14 +24,6 @@ func NewLineWriter(handlers ...LineHandler) io.Writer {
|
||||
return w
|
||||
}
|
||||
|
||||
// FlushWriter flushes w if it implements Flusher. It is a no-op otherwise, so
|
||||
// callers can flush an io.Writer without knowing its concrete type.
|
||||
func FlushWriter(w io.Writer) {
|
||||
if f, ok := w.(Flusher); ok {
|
||||
f.Flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (lw *lineWriter) Write(p []byte) (n int, err error) {
|
||||
pBuf := bytes.NewBuffer(p)
|
||||
written := 0
|
||||
@@ -59,17 +44,6 @@ func (lw *lineWriter) Write(p []byte) (n int, err error) {
|
||||
return written, nil
|
||||
}
|
||||
|
||||
// Flush emits any buffered, not-yet-newline-terminated content as a final line.
|
||||
// It is safe to call multiple times; subsequent calls with an empty buffer are
|
||||
// no-ops.
|
||||
func (lw *lineWriter) Flush() {
|
||||
if lw.buffer.Len() == 0 {
|
||||
return
|
||||
}
|
||||
lw.handleLine(lw.buffer.String())
|
||||
lw.buffer.Reset()
|
||||
}
|
||||
|
||||
func (lw *lineWriter) handleLine(line string) {
|
||||
for _, h := range lw.handlers {
|
||||
ok := h(line)
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -40,33 +39,3 @@ func TestLineWriter(t *testing.T) {
|
||||
assert.Equal(" and another\n", lines[2])
|
||||
assert.Equal("last line\n", lines[3])
|
||||
}
|
||||
|
||||
func TestLineWriterFlush(t *testing.T) {
|
||||
lines := make([]string, 0)
|
||||
lineHandler := func(s string) bool {
|
||||
lines = append(lines, s)
|
||||
return true
|
||||
}
|
||||
|
||||
lineWriter := NewLineWriter(lineHandler)
|
||||
|
||||
assert := assert.New(t)
|
||||
_, err := lineWriter.Write([]byte("complete line\npartial line without newline"))
|
||||
assert.NoError(err) //nolint:testifylint // pre-existing pattern from nektos/act
|
||||
|
||||
// Only the newline-terminated line is emitted before flushing.
|
||||
assert.Equal([]string{"complete line\n"}, lines)
|
||||
|
||||
// Flushing emits the buffered, not-yet-terminated trailing line.
|
||||
FlushWriter(lineWriter)
|
||||
assert.Equal([]string{"complete line\n", "partial line without newline"}, lines)
|
||||
|
||||
// Flushing again is a no-op: nothing is buffered.
|
||||
FlushWriter(lineWriter)
|
||||
assert.Len(lines, 2)
|
||||
}
|
||||
|
||||
func TestFlushWriterIgnoresNonFlusher(t *testing.T) {
|
||||
// FlushWriter must be a safe no-op for writers that do not buffer lines.
|
||||
assert.NotPanics(t, func() { FlushWriter(io.Discard) })
|
||||
}
|
||||
|
||||
@@ -84,12 +84,6 @@ type NewDockerBuildExecutorInput struct {
|
||||
Platform string
|
||||
}
|
||||
|
||||
// NewDockerNetworkCreateExecutorInput the input for the NewDockerNetworkCreateExecutor function
|
||||
type NewDockerNetworkCreateExecutorInput struct {
|
||||
EnableIPv4 *bool
|
||||
EnableIPv6 *bool
|
||||
}
|
||||
|
||||
// NewDockerPullExecutorInput the input for the NewDockerPullExecutor function
|
||||
type NewDockerPullExecutorInput struct {
|
||||
Image string
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/moby/moby/client"
|
||||
)
|
||||
|
||||
func NewDockerNetworkCreateExecutor(name string, opts NewDockerNetworkCreateExecutorInput) common.Executor {
|
||||
func NewDockerNetworkCreateExecutor(name string) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
cli, err := GetDockerClient(ctx)
|
||||
if err != nil {
|
||||
@@ -39,8 +39,6 @@ func NewDockerNetworkCreateExecutor(name string, opts NewDockerNetworkCreateExec
|
||||
_, err = cli.NetworkCreate(ctx, name, client.NetworkCreateOptions{
|
||||
Driver: "bridge",
|
||||
Scope: "local",
|
||||
EnableIPv4: opts.EnableIPv4,
|
||||
EnableIPv6: opts.EnableIPv6,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
"gitea.com/gitea/runner/act/filecollector"
|
||||
@@ -46,13 +45,6 @@ import (
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
// drainGracePeriod bounds how long we wait for an output-copy goroutine to
|
||||
// finish draining a container's output before returning, so that neither a
|
||||
// cancellation (waitForCommand) nor a normal container exit (wait) truncates
|
||||
// the tail of the log. It is a safety bound: in the common case the stream
|
||||
// reaches EOF and the goroutine returns well before this elapses.
|
||||
const drainGracePeriod = 2 * time.Second
|
||||
|
||||
// NewContainer creates a reference to a container
|
||||
func NewContainer(input *NewContainerInput) ExecutionsEnvironment {
|
||||
cr := new(containerReference)
|
||||
@@ -237,10 +229,6 @@ type containerReference struct {
|
||||
input *NewContainerInput
|
||||
UID int
|
||||
GID int
|
||||
// attachDone is closed by the attach() streaming goroutine once it has
|
||||
// drained and flushed the container's output. wait() blocks on it so the
|
||||
// tail of the log lands before the step proceeds.
|
||||
attachDone chan struct{}
|
||||
LinuxContainerEnvironmentExtensions
|
||||
}
|
||||
|
||||
@@ -742,9 +730,7 @@ func (cr *containerReference) tryReadGID() common.Executor {
|
||||
func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal bool, resp client.HijackedResponse, _ client.ExecCreateResult, _, _ string) error {
|
||||
logger := common.Logger(ctx)
|
||||
|
||||
// Buffered so the copy goroutine never blocks on send if the grace-period
|
||||
// drain below times out and no one is left to receive.
|
||||
cmdResponse := make(chan error, 1)
|
||||
cmdResponse := make(chan error)
|
||||
|
||||
go func() {
|
||||
var outWriter io.Writer
|
||||
@@ -763,11 +749,6 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
||||
} else {
|
||||
_, err = io.Copy(outWriter, resp.Reader)
|
||||
}
|
||||
// Flush any buffered, not-yet-newline-terminated trailing line so the
|
||||
// final line of a command's output is not lost (e.g. an error message
|
||||
// printed without a trailing newline before the process exits).
|
||||
common.FlushWriter(outWriter)
|
||||
common.FlushWriter(errWriter)
|
||||
cmdResponse <- err
|
||||
}()
|
||||
|
||||
@@ -779,16 +760,6 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
||||
logger.Warnf("Failed to send CTRL+C: %+s", err)
|
||||
}
|
||||
|
||||
// Give the copy goroutine a brief grace period to drain output already
|
||||
// produced by the command before we return, so cancellation does not
|
||||
// truncate the tail of the log. The goroutine exits once the hijacked
|
||||
// stream is closed by resp.Close() in the caller's defer.
|
||||
select {
|
||||
case <-cmdResponse:
|
||||
case <-time.After(drainGracePeriod):
|
||||
logger.Warn("Timed out draining command output after cancellation")
|
||||
}
|
||||
|
||||
// we return the context canceled error to prevent other steps
|
||||
// from executing
|
||||
return ctx.Err()
|
||||
@@ -974,23 +945,14 @@ func (cr *containerReference) attach() common.Executor {
|
||||
if errWriter == nil {
|
||||
errWriter = os.Stderr
|
||||
}
|
||||
done := make(chan struct{})
|
||||
cr.attachDone = done
|
||||
go func() {
|
||||
defer close(done)
|
||||
var copyErr error
|
||||
if !isTerminal || os.Getenv("NORAW") != "" {
|
||||
_, copyErr = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
||||
_, err = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
||||
} else {
|
||||
_, copyErr = io.Copy(outWriter, out.Reader)
|
||||
_, err = io.Copy(outWriter, out.Reader)
|
||||
}
|
||||
// Flush any buffered, not-yet-newline-terminated trailing line once
|
||||
// the stream reaches EOF, so the final line of the container's
|
||||
// output is not lost when it is not newline-terminated.
|
||||
common.FlushWriter(outWriter)
|
||||
common.FlushWriter(errWriter)
|
||||
if copyErr != nil {
|
||||
common.Logger(ctx).Error(copyErr)
|
||||
if err != nil {
|
||||
common.Logger(ctx).Error(err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
@@ -1029,18 +991,6 @@ func (cr *containerReference) wait() common.Executor {
|
||||
|
||||
logger.Debugf("Return status: %v", statusCode)
|
||||
|
||||
// The container has exited; wait for the attach() streaming goroutine to
|
||||
// finish draining and flushing its output before returning, so the tail
|
||||
// of the log is not lost. Bounded so a stuck stream cannot hang the step.
|
||||
if cr.attachDone != nil {
|
||||
select {
|
||||
case <-cr.attachDone:
|
||||
case <-time.After(drainGracePeriod):
|
||||
logger.Warn("Timed out draining container output")
|
||||
}
|
||||
cr.attachDone = nil
|
||||
}
|
||||
|
||||
if statusCode == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
@@ -21,7 +20,6 @@ import (
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
|
||||
cerrdefs "github.com/containerd/errdefs"
|
||||
"github.com/moby/moby/api/pkg/stdcopy"
|
||||
"github.com/moby/moby/api/types/container"
|
||||
mobyclient "github.com/moby/moby/client"
|
||||
"github.com/sirupsen/logrus/hooks/test"
|
||||
@@ -91,11 +89,6 @@ func (m *mockDockerClient) ExecInspect(ctx context.Context, execID string, opts
|
||||
return args.Get(0).(mobyclient.ExecInspectResult), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockDockerClient) ContainerAttach(ctx context.Context, containerID string, opts mobyclient.ContainerAttachOptions) (mobyclient.ContainerAttachResult, error) {
|
||||
args := m.Called(ctx, containerID, opts)
|
||||
return args.Get(0).(mobyclient.ContainerAttachResult), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockDockerClient) ContainerWait(ctx context.Context, containerID string, opts mobyclient.ContainerWaitOptions) mobyclient.ContainerWaitResult {
|
||||
args := m.Called(ctx, containerID, opts)
|
||||
return args.Get(0).(mobyclient.ContainerWaitResult)
|
||||
@@ -213,71 +206,6 @@ func TestDockerExecFailure(t *testing.T) {
|
||||
client.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// stdcopyFrame wraps payload in a single Docker multiplexed-stream frame, the
|
||||
// format StdCopy expects: an 8-byte header (stream type + 4-byte big-endian
|
||||
// length) followed by the payload.
|
||||
func stdcopyFrame(stream stdcopy.StdType, payload string) []byte {
|
||||
b := make([]byte, 8+len(payload))
|
||||
b[0] = byte(stream)
|
||||
binary.BigEndian.PutUint32(b[4:8], uint32(len(payload)))
|
||||
copy(b[8:], payload)
|
||||
return b
|
||||
}
|
||||
|
||||
// TestDockerAttachFlushesTrailingLine verifies that wait() blocks until the
|
||||
// attach() streaming goroutine has drained and flushed the container's output,
|
||||
// so a final line without a trailing newline is not lost.
|
||||
func TestDockerAttachFlushesTrailingLine(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
framed := bytes.NewBuffer(stdcopyFrame(stdcopy.Stdout, "line one\nlast line without newline"))
|
||||
|
||||
var lines []string
|
||||
logWriter := common.NewLineWriter(func(s string) bool {
|
||||
lines = append(lines, s)
|
||||
return true
|
||||
})
|
||||
|
||||
client := &mockDockerClient{}
|
||||
client.On("ContainerAttach", ctx, "123", mock.AnythingOfType("client.ContainerAttachOptions")).
|
||||
Return(mobyclient.ContainerAttachResult{
|
||||
HijackedResponse: mobyclient.HijackedResponse{
|
||||
Conn: &mockConn{},
|
||||
Reader: bufio.NewReader(framed),
|
||||
},
|
||||
}, nil)
|
||||
|
||||
statusCh := make(chan container.WaitResponse, 1)
|
||||
statusCh <- container.WaitResponse{StatusCode: 0}
|
||||
errCh := make(chan error, 1)
|
||||
client.On("ContainerWait", ctx, "123", mobyclient.ContainerWaitOptions{Condition: container.WaitConditionNotRunning}).
|
||||
Return(mobyclient.ContainerWaitResult{
|
||||
Result: (<-chan container.WaitResponse)(statusCh),
|
||||
Error: (<-chan error)(errCh),
|
||||
})
|
||||
|
||||
cr := &containerReference{
|
||||
id: "123",
|
||||
cli: client,
|
||||
input: &NewContainerInput{
|
||||
Image: "image",
|
||||
Stdout: logWriter,
|
||||
Stderr: logWriter,
|
||||
},
|
||||
}
|
||||
|
||||
require.NoError(t, cr.attach()(ctx))
|
||||
require.NoError(t, cr.wait()(ctx))
|
||||
|
||||
// wait() must have blocked until the goroutine drained AND flushed; the
|
||||
// trailing, non-newline-terminated line must therefore be present. Reading
|
||||
// lines here is race-free because wait() synchronizes on attachDone, which
|
||||
// the goroutine closes after the final append.
|
||||
assert.Equal(t, []string{"line one\n", "last line without newline"}, lines)
|
||||
|
||||
client.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestDockerWaitFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ func NewDockerVolumeRemoveExecutor(volume string, force bool) common.Executor {
|
||||
}
|
||||
}
|
||||
|
||||
func NewDockerNetworkCreateExecutor(name string, opts NewDockerNetworkCreateExecutorInput) common.Executor {
|
||||
func NewDockerNetworkCreateExecutor(name string) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -323,15 +323,15 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
||||
cmd.Dir = wd
|
||||
cmd.SysProcAttr = getSysProcAttr(cmdline, false)
|
||||
|
||||
// A step often launches a process tree (a shell that starts a child which
|
||||
// spawns further background or GUI processes). The default context
|
||||
// cancellation only kills the direct child, leaving the rest of the tree
|
||||
// running; and because the orphans inherit cmd's stdout/stderr pipe,
|
||||
// cmd.Wait() would block forever, hanging the runner. Kill the whole tree on
|
||||
// cancellation — via a Job Object on Windows and the process group on Unix
|
||||
// (see processKiller) — and bound the wait so a leftover pipe writer can
|
||||
// never hang Wait indefinitely.
|
||||
// On Windows a step often launches a process tree (a shell that starts a
|
||||
// child which spawns further GUI or background processes). The default
|
||||
// context cancellation only kills the direct child, leaving the rest of the
|
||||
// tree running; and because the orphans inherit cmd's stdout/stderr pipe,
|
||||
// cmd.Wait() would block forever, hanging the runner. Kill the whole tree
|
||||
// via a Job Object on cancellation, and bound the wait so a leftover pipe
|
||||
// writer can never hang Wait indefinitely.
|
||||
var killer atomic.Pointer[processKiller]
|
||||
if runtime.GOOS == "windows" {
|
||||
cmd.Cancel = func() error {
|
||||
if k := killer.Load(); k != nil {
|
||||
return k.Kill()
|
||||
@@ -341,10 +341,10 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// Once the step process has exited, give its I/O pipes at most this long to
|
||||
// drain before Wait force-closes them and returns (Go's WaitDelay). This
|
||||
// also covers a step that backgrounds a process holding the pipe open.
|
||||
// Once the step process has exited, give its I/O pipes at most this long
|
||||
// to drain before Wait force-closes them and returns (Go's WaitDelay).
|
||||
cmd.WaitDelay = 10 * time.Second
|
||||
}
|
||||
|
||||
var ppty *os.File
|
||||
var tty *os.File
|
||||
@@ -375,17 +375,18 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Capture the started process for tree-kill on cancellation: a Job Object on
|
||||
// Windows (children spawned afterwards are auto-included) and the process
|
||||
// group on Unix. On failure (e.g. Windows nested-job restrictions) we fall
|
||||
// back to the default single-process kill; WaitDelay + end-of-job cleanup
|
||||
// still apply.
|
||||
if runtime.GOOS == "windows" {
|
||||
// Assign the started process to a Job Object so cmd.Cancel can kill the
|
||||
// whole descendant tree. Children spawned afterwards are auto-included.
|
||||
// On failure (e.g. nested-job restrictions) we fall back to the default
|
||||
// single-process kill; WaitDelay + end-of-job cleanup still apply.
|
||||
if k, kerr := newProcessKiller(cmd.Process); kerr != nil {
|
||||
common.Logger(ctx).Warnf("process tree kill setup failed, falling back to single-process kill: %v", kerr)
|
||||
} else {
|
||||
killer.Store(k)
|
||||
defer k.Close()
|
||||
}
|
||||
}
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
var exitErr *exec.ExitError
|
||||
@@ -428,24 +429,6 @@ func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string)
|
||||
return parseEnvFile(e, srcPath, env)
|
||||
}
|
||||
|
||||
// removeAll is the filesystem delete used by removeAllWithContext. A package
|
||||
// var so tests can substitute a blocking stub without patching os.RemoveAll.
|
||||
var removeAll = os.RemoveAll
|
||||
|
||||
// removeAllWithContext runs removeAll in a goroutine and returns once it
|
||||
// finishes or ctx is cancelled. On cancellation the goroutine is left running —
|
||||
// a delete blocked inside a syscall cannot be interrupted (see runWithTimeout).
|
||||
func removeAllWithContext(ctx context.Context, path string) error {
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- removeAll(path) }()
|
||||
select {
|
||||
case err := <-done:
|
||||
return err
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func removePathWithRetry(ctx context.Context, path string) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
@@ -465,13 +448,10 @@ func removePathWithRetry(ctx context.Context, path string) error {
|
||||
case <-time.After(delay):
|
||||
}
|
||||
}
|
||||
lastErr = removeAllWithContext(ctx, path)
|
||||
lastErr = os.RemoveAll(path)
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
if errors.Is(lastErr, context.DeadlineExceeded) {
|
||||
return lastErr
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
@@ -553,61 +533,23 @@ func (e *HostEnvironment) terminateRunningProcesses(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// hostCleanupTimeout bounds each filesystem-teardown phase of the host
|
||||
// environment so a single stalled delete cannot wedge the runner slot forever.
|
||||
// A var (not const) so tests can shrink it.
|
||||
var hostCleanupTimeout = 30 * time.Second
|
||||
|
||||
// runWithTimeout runs fn in a goroutine and returns once it finishes or timeout
|
||||
// elapses, whichever comes first. On timeout the goroutine is left running — an
|
||||
// os.RemoveAll blocked inside a delete syscall (AV/EDR filter drivers, an
|
||||
// unresponsive network mount, a dying disk) cannot be interrupted — and
|
||||
// context.DeadlineExceeded is returned. Leaking the goroutine and the scratch
|
||||
// state it was deleting is strictly better than blocking the caller forever and
|
||||
// permanently losing the runner's capacity slot; the leaked scratch dir is
|
||||
// reclaimed later by the runner's idle stale-dir sweep.
|
||||
func runWithTimeout(fn func(), timeout time.Duration) error {
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
fn()
|
||||
}()
|
||||
timer := time.NewTimer(timeout)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-timer.C:
|
||||
return context.DeadlineExceeded
|
||||
}
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) Remove() common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
logger := common.Logger(ctx)
|
||||
|
||||
// Ensure any lingering child processes are ended before attempting
|
||||
// to remove the workspace (Windows file locks otherwise prevent cleanup).
|
||||
e.terminateRunningProcesses(ctx)
|
||||
|
||||
// Only removes per-job misc state. Must not remove the cache/toolcache root.
|
||||
// Bound it: CleanUp is a caller-supplied, typically unbounded os.RemoveAll,
|
||||
// and a delete stalled by a filesystem filter driver would otherwise hang
|
||||
// the job forever at "Cleaning up container" and hold the capacity slot.
|
||||
if e.CleanUp != nil {
|
||||
logger.Debugf("running host environment cleanup callback")
|
||||
if err := runWithTimeout(e.CleanUp, hostCleanupTimeout); err != nil {
|
||||
logger.Warnf("host environment cleanup did not finish within %s; continuing job completion, scratch state may be leaked and is reclaimed by the idle stale-dir sweep", hostCleanupTimeout)
|
||||
} else {
|
||||
logger.Debugf("host environment cleanup callback finished")
|
||||
}
|
||||
e.CleanUp()
|
||||
}
|
||||
|
||||
// Detach: a cancelled ctx would skip removePathWithRetry's retries,
|
||||
// which absorb Windows file-handle release lag after the kill above.
|
||||
rmCtx, rmCancel := context.WithTimeout(context.Background(), hostCleanupTimeout)
|
||||
rmCtx, rmCancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer rmCancel()
|
||||
|
||||
logger := common.Logger(ctx)
|
||||
var errs []error
|
||||
if err := removePathWithRetry(rmCtx, e.Path); err != nil {
|
||||
logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
|
||||
@@ -619,15 +561,8 @@ func (e *HostEnvironment) Remove() common.Executor {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
for _, err := range errs {
|
||||
if !errors.Is(err, context.DeadlineExceeded) {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
}
|
||||
// Bounded teardown timed out; warnings already logged above. Do not
|
||||
// fail job completion — leaked scratch is reclaimed by the idle sweep.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (e *HostEnvironment) ToContainerPath(path string) string {
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
|
||||
@@ -189,118 +188,6 @@ func TestHostEnvironmentRemoveCleansWorkdirWhenOwned(t *testing.T) {
|
||||
assert.ErrorIs(t, err, os.ErrNotExist)
|
||||
}
|
||||
|
||||
func TestRemoveAllWithContextDoesNotHangOnStuckDelete(t *testing.T) {
|
||||
release := make(chan struct{})
|
||||
stubDone := make(chan struct{})
|
||||
|
||||
orig := removeAll
|
||||
removeAll = func(string) error {
|
||||
defer close(stubDone)
|
||||
<-release
|
||||
return nil
|
||||
}
|
||||
// removeAllWithContext intentionally leaks the delete goroutine on timeout,
|
||||
// and that goroutine still references removeAll. Unblock it and wait for it
|
||||
// to return before restoring the var, so the restore can't race the read.
|
||||
t.Cleanup(func() {
|
||||
close(release)
|
||||
<-stubDone
|
||||
removeAll = orig
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
err := removeAllWithContext(ctx, t.TempDir())
|
||||
require.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
}
|
||||
|
||||
// TestHostEnvironmentRemoveDoesNotHangOnStuckCleanUp guards against a stalled
|
||||
// CleanUp callback (e.g. an os.RemoveAll blocked by an AV/EDR filter driver or
|
||||
// an unresponsive mount) wedging the runner slot forever at "Cleaning up
|
||||
// container". Remove must time out the callback and complete job teardown.
|
||||
func TestHostEnvironmentRemoveDoesNotHangOnStuckCleanUp(t *testing.T) {
|
||||
// Keep the suite fast: shrink the per-phase teardown timeout for this test.
|
||||
orig := hostCleanupTimeout
|
||||
hostCleanupTimeout = 100 * time.Millisecond
|
||||
t.Cleanup(func() { hostCleanupTimeout = orig })
|
||||
|
||||
logger := logrus.New()
|
||||
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||
base := t.TempDir()
|
||||
path := filepath.Join(base, "misc", "hostexecutor")
|
||||
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||
|
||||
release := make(chan struct{})
|
||||
t.Cleanup(func() { close(release) }) // unblock the leaked goroutine at test end
|
||||
|
||||
e := &HostEnvironment{
|
||||
Path: path,
|
||||
CleanUp: func() {
|
||||
<-release // simulate a delete syscall stuck indefinitely
|
||||
},
|
||||
StdOut: os.Stdout,
|
||||
}
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- e.Remove()(ctx) }()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
require.NoError(t, err)
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatal("Remove() hung on a stuck CleanUp callback")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHostEnvironmentRemoveDoesNotHangOnStuckPathRemoval guards against a
|
||||
// stalled os.RemoveAll on the misc/workspace paths (same AV/EDR wedge as
|
||||
// #1023) wedging job completion after the CleanUp callback has already timed
|
||||
// out or finished.
|
||||
func TestHostEnvironmentRemoveDoesNotHangOnStuckPathRemoval(t *testing.T) {
|
||||
origTimeout := hostCleanupTimeout
|
||||
hostCleanupTimeout = 100 * time.Millisecond
|
||||
t.Cleanup(func() { hostCleanupTimeout = origTimeout })
|
||||
|
||||
release := make(chan struct{})
|
||||
stubDone := make(chan struct{})
|
||||
|
||||
origRemoveAll := removeAll
|
||||
removeAll = func(string) error {
|
||||
defer close(stubDone)
|
||||
<-release
|
||||
return nil
|
||||
}
|
||||
// The stuck delete goroutine outlives the timed-out Remove and still reads
|
||||
// removeAll; unblock it and wait before restoring to avoid a restore/read race.
|
||||
t.Cleanup(func() {
|
||||
close(release)
|
||||
<-stubDone
|
||||
removeAll = origRemoveAll
|
||||
})
|
||||
|
||||
logger := logrus.New()
|
||||
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||
base := t.TempDir()
|
||||
path := filepath.Join(base, "misc", "hostexecutor")
|
||||
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||
|
||||
e := &HostEnvironment{
|
||||
Path: path,
|
||||
StdOut: os.Stdout,
|
||||
}
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- e.Remove()(ctx) }()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
require.NoError(t, err)
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatal("Remove() hung on a stuck path removal")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildWindowsWorkspaceKillScript(t *testing.T) {
|
||||
t.Run("single dir", func(t *testing.T) {
|
||||
s := buildWindowsWorkspaceKillScript([]string{`C:\workspace\job1`})
|
||||
|
||||
@@ -1,29 +1,19 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
//go:build plan9
|
||||
//go:build !windows
|
||||
|
||||
package container
|
||||
|
||||
import "os"
|
||||
|
||||
// processKiller falls back to single-process termination on platforms without
|
||||
// a process-group / Job Object tree-kill. The Job Object (Windows) and process
|
||||
// group (Unix) based tree-kills live in process_windows.go / process_unix.go;
|
||||
// here we just kill the direct child, matching the previous default behaviour.
|
||||
type processKiller struct {
|
||||
p *os.Process
|
||||
}
|
||||
// processKiller is a no-op on non-Windows platforms. The Job Object based
|
||||
// tree-kill is only wired in on Windows (see exec()); elsewhere the default
|
||||
// exec.CommandContext cancellation and Setpgid handling apply.
|
||||
type processKiller struct{}
|
||||
|
||||
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
||||
return &processKiller{p: p}, nil
|
||||
}
|
||||
func newProcessKiller(_ *os.Process) (*processKiller, error) { return &processKiller{}, nil }
|
||||
|
||||
func (k *processKiller) Kill() error {
|
||||
if k == nil || k.p == nil {
|
||||
return nil
|
||||
}
|
||||
return k.p.Kill()
|
||||
}
|
||||
func (k *processKiller) Kill() error { return nil }
|
||||
|
||||
func (k *processKiller) Close() error { return nil }
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
//go:build !windows && !plan9
|
||||
|
||||
package container
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// processKiller terminates a step process together with its whole process
|
||||
// group, which is the Unix counterpart of the Windows Job Object tree-kill.
|
||||
//
|
||||
// Background: a step often launches a process tree (a shell that starts a child
|
||||
// which in turn spawns further background processes). The default
|
||||
// exec.CommandContext cancellation only kills the direct child, so cancelling a
|
||||
// job left the rest of the tree running. Because those orphans inherited the
|
||||
// step's stdout/stderr pipe, cmd.Wait() also blocked forever and the runner
|
||||
// hung.
|
||||
//
|
||||
// Steps are started with Setpgid (or Setsid for the PTY path, see
|
||||
// getSysProcAttr), which makes the step process the leader of a new process
|
||||
// group whose ID equals its PID. Signalling the negative PID delivers to every
|
||||
// process still in that group, so we can tear down the whole tree atomically on
|
||||
// cancellation, which also closes the inherited pipe handles so cmd.Wait() can
|
||||
// return.
|
||||
type processKiller struct {
|
||||
pgid int
|
||||
}
|
||||
|
||||
// newProcessKiller captures the process group of p (an already-started
|
||||
// process). Because the step is launched with Setpgid/Setsid, p is a group
|
||||
// leader and its PGID equals its PID; children spawned afterwards stay in the
|
||||
// same group unless they explicitly create their own.
|
||||
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
||||
return &processKiller{pgid: p.Pid}, nil
|
||||
}
|
||||
|
||||
// Kill sends SIGKILL to the entire process group (the step process and every
|
||||
// descendant that stayed in the group). A missing group (ESRCH) means the
|
||||
// processes already exited and is not treated as an error.
|
||||
func (k *processKiller) Kill() error {
|
||||
if k == nil || k.pgid <= 0 {
|
||||
return nil
|
||||
}
|
||||
if err := syscall.Kill(-k.pgid, syscall.SIGKILL); err != nil && !errors.Is(err, syscall.ESRCH) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close is a no-op on Unix; there is no job handle to release.
|
||||
func (k *processKiller) Close() error { return nil }
|
||||
@@ -1,100 +0,0 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
//go:build !windows && !plan9
|
||||
|
||||
package container
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// processAlive reports whether pid refers to a still-running process. Signal 0
|
||||
// performs error checking without delivering a signal: a nil error (or EPERM)
|
||||
// means the process exists, ESRCH means it is gone.
|
||||
//
|
||||
// On Linux, zombie processes (state Z in /proc/<pid>/stat) appear alive to
|
||||
// kill(0) but have already terminated — their corpse lingers until the parent
|
||||
// calls wait(). In a Docker container the child may be reparented to a PID 1
|
||||
// that does not reap promptly, so we treat zombies as not alive.
|
||||
func processAlive(pid int) bool {
|
||||
err := syscall.Kill(pid, 0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
// On Linux /proc is available; check whether the process is a zombie.
|
||||
if b, readErr := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)); readErr == nil {
|
||||
// Format: "pid (comm) state ..." — state follows the closing ')' of the
|
||||
// command name (which may itself contain spaces and parens).
|
||||
rest := string(b)
|
||||
if idx := strings.LastIndex(rest, ") "); idx >= 0 {
|
||||
fields := strings.Fields(rest[idx+2:])
|
||||
if len(fields) > 0 && fields[0] == "Z" {
|
||||
return false // zombie: terminated but not yet reaped
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TestProcessKillerKillsTree verifies that a process group captured by the
|
||||
// killer is terminated together with a child the step spawns afterwards. This
|
||||
// mirrors a step that launches a child which spawns further processes, where
|
||||
// cancelling the job must take down the whole tree, not just the direct child.
|
||||
func TestProcessKillerKillsTree(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
pidFile := filepath.Join(dir, "child.pid")
|
||||
|
||||
// Parent shell backgrounds a long-lived child (writing its PID to a file)
|
||||
// and then sleeps. With job control off (non-interactive sh) the backgrounded
|
||||
// child stays in the parent's process group, so the group kill must reach it.
|
||||
script := fmt.Sprintf(`sleep 600 & echo $! > %q; sleep 600`, pidFile)
|
||||
cmd := exec.Command("/bin/sh", "-c", script)
|
||||
// Launch as its own process-group leader, exactly like a real step does (see
|
||||
// getSysProcAttr), so the killer's PGID == the process PID.
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
require.NoError(t, cmd.Start())
|
||||
t.Cleanup(func() {
|
||||
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
|
||||
_ = cmd.Wait()
|
||||
})
|
||||
|
||||
killer, err := newProcessKiller(cmd.Process)
|
||||
require.NoError(t, err)
|
||||
defer killer.Close()
|
||||
|
||||
// Wait for the backgrounded child PID to be reported.
|
||||
var childPID int
|
||||
require.Eventually(t, func() bool {
|
||||
b, e := os.ReadFile(pidFile)
|
||||
if e != nil {
|
||||
return false
|
||||
}
|
||||
s := strings.TrimSpace(string(b))
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
childPID, _ = strconv.Atoi(s)
|
||||
return childPID > 0 && processAlive(childPID)
|
||||
}, 20*time.Second, 100*time.Millisecond, "child process should start")
|
||||
|
||||
// Killing the group must terminate both the parent and the backgrounded child.
|
||||
require.NoError(t, killer.Kill())
|
||||
// Reap the parent so it does not linger as a zombie (which would still report
|
||||
// as alive); SIGKILL makes Wait return promptly.
|
||||
_ = cmd.Wait()
|
||||
|
||||
require.Eventually(t, func() bool {
|
||||
return !processAlive(childPID)
|
||||
}, 20*time.Second, 100*time.Millisecond, "backgrounded child should be terminated")
|
||||
}
|
||||
@@ -48,11 +48,8 @@ func (rc *RunContext) commandHandler(ctx context.Context) common.LineHandler {
|
||||
if resumeCommand != "" && command != resumeCommand {
|
||||
// There should not be any emojis in the log output for Gitea.
|
||||
// The code in the switch statement is the same.
|
||||
// Return true (not false) so the line still reaches the raw_output
|
||||
// log handler; otherwise everything between ::stop-commands:: and
|
||||
// its end token is silently dropped from the step log.
|
||||
logger.Infof("%s", line)
|
||||
return true
|
||||
return false
|
||||
}
|
||||
arg = UnescapeCommandData(arg)
|
||||
kvPairs = unescapeKvPairs(kvPairs)
|
||||
|
||||
@@ -28,29 +28,6 @@ func TestSetEnv(t *testing.T) {
|
||||
a.Equal("valz", rc.Env["x"])
|
||||
}
|
||||
|
||||
func TestStopCommandsKeepsSuppressedLinesInLog(t *testing.T) {
|
||||
a := assert.New(t)
|
||||
ctx := context.Background()
|
||||
rc := new(RunContext)
|
||||
handler := rc.commandHandler(ctx)
|
||||
|
||||
// Stop command processing until the matching end token is seen.
|
||||
a.True(handler("::stop-commands::my-end-token\n"))
|
||||
|
||||
// A command-shaped line while stopped must not be executed (env unchanged),
|
||||
// but must still return true so it reaches the raw_output log handler and is
|
||||
// not dropped from the step log.
|
||||
a.True(handler("::set-env name=x::valz\n"))
|
||||
a.NotContains(rc.Env, "x")
|
||||
|
||||
// The matching end token resumes command processing.
|
||||
a.True(handler("::my-end-token::\n"))
|
||||
|
||||
// Commands are processed again after resuming.
|
||||
a.True(handler("::set-env name=y::valy\n"))
|
||||
a.Equal("valy", rc.Env["y"])
|
||||
}
|
||||
|
||||
func TestSetOutput(t *testing.T) {
|
||||
a := assert.New(t)
|
||||
ctx := context.Background()
|
||||
|
||||
@@ -462,11 +462,6 @@ func useStepLogger(rc *RunContext, stepModel *model.Step, stage stepStage, execu
|
||||
oldout, olderr := rc.JobContainer.ReplaceLogWriter(logWriter, logWriter)
|
||||
defer rc.JobContainer.ReplaceLogWriter(oldout, olderr)
|
||||
|
||||
// Flush any buffered, not-yet-newline-terminated trailing line once the
|
||||
// step has finished, so the final line of the step's output is not lost
|
||||
// when it is not newline-terminated.
|
||||
defer common.FlushWriter(logWriter)
|
||||
|
||||
return executor(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -471,8 +471,7 @@ func (rc *RunContext) startJobContainer() common.Executor {
|
||||
rc.pullServicesImages(rc.Config.ForcePull),
|
||||
rc.JobContainer.Pull(rc.Config.ForcePull),
|
||||
rc.stopJobContainer(),
|
||||
container.NewDockerNetworkCreateExecutor(networkName, rc.Config.ContainerNetworkCreateOptions).
|
||||
IfBool(createAndDeleteNetwork),
|
||||
container.NewDockerNetworkCreateExecutor(networkName).IfBool(createAndDeleteNetwork),
|
||||
rc.startServiceContainers(networkName),
|
||||
rc.JobContainer.Create(rc.Config.ContainerCapAdd, rc.Config.ContainerCapDrop),
|
||||
rc.JobContainer.Start(false),
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
"gitea.com/gitea/runner/act/container"
|
||||
"gitea.com/gitea/runner/act/model"
|
||||
|
||||
docker_container "github.com/moby/moby/api/types/container"
|
||||
@@ -69,7 +68,6 @@ type Config struct {
|
||||
ReplaceGheActionTokenWithGithubCom string // Token of private action repo on GitHub.
|
||||
Matrix map[string]map[string]bool // Matrix config to run
|
||||
ContainerNetworkMode docker_container.NetworkMode // the network mode of job containers (the value of --network)
|
||||
ContainerNetworkCreateOptions container.NewDockerNetworkCreateExecutorInput // the default network create options
|
||||
ActionCache ActionCache // Use a custom ActionCache Implementation
|
||||
|
||||
PresetGitHubContext *model.GithubContext // the preset github context, overrides some fields like DefaultBranch, Env, Secrets etc.
|
||||
|
||||
2
go.mod
2
go.mod
@@ -36,7 +36,7 @@ require (
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/timshannon/bolthold v0.0.0-20240314194003-30aac6950928
|
||||
go.etcd.io/bbolt v1.4.3
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.3
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.5
|
||||
golang.org/x/sys v0.46.0
|
||||
golang.org/x/term v0.44.0
|
||||
google.golang.org/protobuf v1.36.11
|
||||
|
||||
2
go.sum
2
go.sum
@@ -232,6 +232,8 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go=
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0=
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.5 h1:JVliQq9EGOYaTgMi+k8BhUJyqcGk4ZqeuiN1Cirba9c=
|
||||
go.yaml.in/yaml/v4 v4.0.0-rc.5/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0=
|
||||
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988=
|
||||
golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc=
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
|
||||
"gitea.com/gitea/runner/act/artifactcache"
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
"gitea.com/gitea/runner/act/container"
|
||||
"gitea.com/gitea/runner/act/model"
|
||||
"gitea.com/gitea/runner/act/runner"
|
||||
"gitea.com/gitea/runner/internal/pkg/client"
|
||||
@@ -34,7 +33,7 @@ import (
|
||||
|
||||
"connectrpc.com/connect"
|
||||
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
||||
docker_container "github.com/moby/moby/api/types/container"
|
||||
"github.com/moby/moby/api/types/container"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -128,22 +127,15 @@ func (r *Runner) OnIdle(ctx context.Context) {
|
||||
if !r.shouldRunIdleCleanup() {
|
||||
return
|
||||
}
|
||||
// Bind-workdir mode: reclaim stale per-task workspace dirs (numeric task IDs).
|
||||
if r.cfg.Container.BindWorkdir {
|
||||
workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/")
|
||||
workdirRoot := filepath.FromSlash("/" + workdirParent)
|
||||
r.cleanupStaleDirs(ctx, workdirRoot, isTaskIDDir)
|
||||
}
|
||||
// Host mode: reclaim per-job scratch dirs left behind when HostEnvironment
|
||||
// cleanup timed out (e.g. a delete stalled by an AV/EDR filter driver). They
|
||||
// sit under the host workdir parent alongside the shared tool_cache, which
|
||||
// the name match leaves untouched. No-op when no host-mode job ever ran.
|
||||
if hostRoot := filepath.FromSlash(r.cfg.Host.WorkdirParent); hostRoot != "" {
|
||||
r.cleanupStaleDirs(ctx, hostRoot, isHostScratchDir)
|
||||
}
|
||||
r.cleanupStaleTaskDirs(ctx, workdirRoot)
|
||||
}
|
||||
|
||||
func (r *Runner) shouldRunIdleCleanup() bool {
|
||||
if !r.cfg.Container.BindWorkdir {
|
||||
return false
|
||||
}
|
||||
if r.cfg.Runner.WorkdirCleanupAge <= 0 || r.cfg.Runner.IdleCleanupInterval <= 0 {
|
||||
return false
|
||||
}
|
||||
@@ -163,52 +155,18 @@ func (r *Runner) shouldRunIdleCleanup() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// cleanupStaleTaskDirs reclaims stale bind-workdir per-task directories under
|
||||
// workdirRoot. Retained as a thin wrapper so existing callers and tests keep a
|
||||
// stable entry point.
|
||||
func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) {
|
||||
r.cleanupStaleDirs(ctx, workdirRoot, isTaskIDDir)
|
||||
}
|
||||
|
||||
// isTaskIDDir reports whether name is a per-task workspace dir (numeric task
|
||||
// ID). Any other directory is skipped to avoid deleting operator-managed data
|
||||
// under workdir_root.
|
||||
func isTaskIDDir(name string) bool {
|
||||
_, err := strconv.ParseUint(name, 10, 64)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// isHostScratchDir reports whether name is a per-job host-mode scratch dir:
|
||||
// hex.EncodeToString of 8 random bytes, i.e. exactly 16 lowercase hex chars
|
||||
// (see startHostEnvironment in act/runner/run_context.go). The narrow match
|
||||
// leaves the sibling shared "tool_cache" dir and any operator data untouched.
|
||||
func isHostScratchDir(name string) bool {
|
||||
if len(name) != 16 {
|
||||
return false
|
||||
}
|
||||
for _, c := range name {
|
||||
if (c < '0' || c > '9') && (c < 'a' || c > 'f') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// cleanupStaleDirs removes immediate child directories of root that match and
|
||||
// whose mtime is older than WorkdirCleanupAge. It is a no-op when root does not
|
||||
// exist yet (the runner has never written there).
|
||||
func (r *Runner) cleanupStaleDirs(ctx context.Context, root string, match func(name string) bool) {
|
||||
entries, err := os.ReadDir(root)
|
||||
entries, err := os.ReadDir(workdirRoot)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return
|
||||
}
|
||||
log.Warnf("failed to list directory %s for stale cleanup: %v", root, err)
|
||||
log.Warnf("failed to list task workspace root %s for stale cleanup: %v", workdirRoot, err)
|
||||
return
|
||||
}
|
||||
|
||||
// A task may begin between shouldRunIdleCleanup's running-count check and
|
||||
// the loop below. That is safe because new dirs are created with the
|
||||
// the loop below. That is safe because new task dirs are created with the
|
||||
// current mtime and therefore fall on the keep side of cutoff.
|
||||
cutoff := r.now().Add(-r.cfg.Runner.WorkdirCleanupAge)
|
||||
for _, entry := range entries {
|
||||
@@ -218,23 +176,25 @@ func (r *Runner) cleanupStaleDirs(ctx context.Context, root string, match func(n
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
if !match(entry.Name()) {
|
||||
// Task workspaces are indexed by numeric task IDs; skip any other
|
||||
// directories to avoid deleting operator-managed data under workdir_root.
|
||||
if _, err := strconv.ParseUint(entry.Name(), 10, 64); err != nil {
|
||||
continue
|
||||
}
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
log.Warnf("failed to stat %s: %v", filepath.Join(root, entry.Name()), err)
|
||||
log.Warnf("failed to stat task workspace %s: %v", filepath.Join(workdirRoot, entry.Name()), err)
|
||||
continue
|
||||
}
|
||||
if info.ModTime().After(cutoff) {
|
||||
continue
|
||||
}
|
||||
dir := filepath.Join(root, entry.Name())
|
||||
if err := os.RemoveAll(dir); err != nil {
|
||||
log.Warnf("failed to clean stale directory %s: %v", dir, err)
|
||||
taskDir := filepath.Join(workdirRoot, entry.Name())
|
||||
if err := os.RemoveAll(taskDir); err != nil {
|
||||
log.Warnf("failed to clean stale task workspace %s: %v", taskDir, err)
|
||||
continue
|
||||
}
|
||||
log.Infof("cleaned stale directory %s", dir)
|
||||
log.Infof("cleaned stale task workspace %s", taskDir)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -434,11 +394,7 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
|
||||
ContainerNamePrefix: fmt.Sprintf("GITEA-ACTIONS-TASK-%d", task.Id),
|
||||
ContainerMaxLifetime: maxLifetime,
|
||||
CleanWorkdir: true,
|
||||
ContainerNetworkMode: docker_container.NetworkMode(r.cfg.Container.Network),
|
||||
ContainerNetworkCreateOptions: container.NewDockerNetworkCreateExecutorInput{
|
||||
EnableIPv4: r.cfg.Container.NetworkCreateOptions.EnableIPv4,
|
||||
EnableIPv6: r.cfg.Container.NetworkCreateOptions.EnableIPv6,
|
||||
},
|
||||
ContainerNetworkMode: container.NetworkMode(r.cfg.Container.Network),
|
||||
ContainerOptions: r.cfg.Container.Options,
|
||||
ContainerDaemonSocket: r.cfg.Container.DockerHost,
|
||||
Privileged: r.cfg.Container.Privileged,
|
||||
|
||||
@@ -52,55 +52,6 @@ func TestRunnerCleanupStaleTaskDirs(t *testing.T) {
|
||||
assert.DirExists(t, alphaNumericTask)
|
||||
}
|
||||
|
||||
// TestRunnerOnIdleCleansStaleHostScratchDirs covers the host-mode leak path:
|
||||
// a per-job scratch dir (16 hex chars) left behind by a timed-out cleanup must
|
||||
// be reclaimed, while the shared tool_cache and operator data are preserved.
|
||||
func TestRunnerOnIdleCleansStaleHostScratchDirs(t *testing.T) {
|
||||
now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
|
||||
hostRoot := filepath.Join(t.TempDir(), "act")
|
||||
require.NoError(t, os.MkdirAll(hostRoot, 0o700))
|
||||
|
||||
staleScratch := filepath.Join(hostRoot, "0123456789abcdef") // 16 hex
|
||||
freshScratch := filepath.Join(hostRoot, "fedcba9876543210")
|
||||
toolCache := filepath.Join(hostRoot, "tool_cache")
|
||||
operatorData := filepath.Join(hostRoot, "keep-me")
|
||||
for _, path := range []string{staleScratch, freshScratch, toolCache, operatorData} {
|
||||
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||
}
|
||||
require.NoError(t, os.Chtimes(staleScratch, now.Add(-48*time.Hour), now.Add(-48*time.Hour)))
|
||||
require.NoError(t, os.Chtimes(freshScratch, now.Add(-10*time.Minute), now.Add(-10*time.Minute)))
|
||||
require.NoError(t, os.Chtimes(toolCache, now.Add(-72*time.Hour), now.Add(-72*time.Hour)))
|
||||
require.NoError(t, os.Chtimes(operatorData, now.Add(-72*time.Hour), now.Add(-72*time.Hour)))
|
||||
|
||||
r := &Runner{
|
||||
cfg: &config.Config{
|
||||
Host: config.Host{WorkdirParent: hostRoot},
|
||||
Runner: config.Runner{
|
||||
WorkdirCleanupAge: 24 * time.Hour,
|
||||
IdleCleanupInterval: time.Minute,
|
||||
},
|
||||
},
|
||||
now: func() time.Time { return now },
|
||||
}
|
||||
|
||||
r.OnIdle(context.Background())
|
||||
|
||||
assert.NoDirExists(t, staleScratch) // stale scratch reclaimed
|
||||
assert.DirExists(t, freshScratch) // within cleanup age, kept
|
||||
assert.DirExists(t, toolCache) // shared cache, never a scratch match
|
||||
assert.DirExists(t, operatorData) // non-hex name, untouched
|
||||
}
|
||||
|
||||
func TestIsHostScratchDir(t *testing.T) {
|
||||
assert.True(t, isHostScratchDir("0123456789abcdef"))
|
||||
assert.True(t, isHostScratchDir("ffffffffffffffff"))
|
||||
assert.False(t, isHostScratchDir("tool_cache"))
|
||||
assert.False(t, isHostScratchDir("0123456789ABCDEF")) // hex.EncodeToString is lowercase
|
||||
assert.False(t, isHostScratchDir("0123456789abcde")) // 15 chars
|
||||
assert.False(t, isHostScratchDir("0123456789abcdef0")) // 17 chars
|
||||
assert.False(t, isHostScratchDir("123"))
|
||||
}
|
||||
|
||||
func TestRunnerCleanupStaleTaskDirsMissingRoot(t *testing.T) {
|
||||
r := &Runner{
|
||||
cfg: &config.Config{
|
||||
@@ -184,10 +135,7 @@ func TestRunnerShouldRunIdleCleanupSkipsWhenJobRunning(t *testing.T) {
|
||||
assert.False(t, r.shouldRunIdleCleanup())
|
||||
}
|
||||
|
||||
// Idle cleanup runs regardless of bind_workdir: host mode (bind_workdir off)
|
||||
// still leaves per-job scratch dirs that the sweep must reclaim.
|
||||
func TestRunnerShouldRunIdleCleanupRunsWithoutBindWorkdir(t *testing.T) {
|
||||
now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
|
||||
func TestRunnerShouldRunIdleCleanupSkipsWhenBindWorkdirDisabled(t *testing.T) {
|
||||
r := &Runner{
|
||||
cfg: &config.Config{
|
||||
Runner: config.Runner{
|
||||
@@ -195,10 +143,10 @@ func TestRunnerShouldRunIdleCleanupRunsWithoutBindWorkdir(t *testing.T) {
|
||||
IdleCleanupInterval: time.Minute,
|
||||
},
|
||||
},
|
||||
now: func() time.Time { return now },
|
||||
now: time.Now,
|
||||
}
|
||||
|
||||
assert.True(t, r.shouldRunIdleCleanup())
|
||||
assert.False(t, r.shouldRunIdleCleanup())
|
||||
}
|
||||
|
||||
func TestRunnerShouldRunIdleCleanupSkipsWhenDisabled(t *testing.T) {
|
||||
|
||||
@@ -40,12 +40,11 @@ runner:
|
||||
# The runner uses exponential backoff when idle, increasing the interval up to this maximum.
|
||||
# Set to 0 or same as fetch_interval to disable backoff.
|
||||
fetch_interval_max: 5s
|
||||
# While idle, remove stale bind-workdir task directories and orphaned host-mode
|
||||
# scratch directories (left behind when a host cleanup delete stalls) older than
|
||||
# this duration. Setting either workdir_cleanup_age or idle_cleanup_interval to 0
|
||||
# (or any non-positive value) disables stale-directory cleanup entirely.
|
||||
# While idle, remove stale bind-workdir task directories older than this duration.
|
||||
# Setting either workdir_cleanup_age or idle_cleanup_interval to 0 (or any
|
||||
# non-positive value) disables workdir cleanup entirely.
|
||||
workdir_cleanup_age: 24h
|
||||
# Cadence for the idle stale-directory cleanup pass.
|
||||
# Cadence for the idle stale bind-workdir cleanup pass.
|
||||
idle_cleanup_interval: 10m
|
||||
# The base interval for periodic log flush to the Gitea instance.
|
||||
# Logs may be sent earlier if the buffer reaches log_report_batch_size
|
||||
@@ -116,13 +115,6 @@ container:
|
||||
# If it's empty, runner will create a network automatically.
|
||||
# Deprecated: `network_mode` is still accepted for old configs; use `network` instead.
|
||||
network: ""
|
||||
# network_create_options only apply when `network` is left empty and the runner
|
||||
# auto-creates a per-job network that does not already exist. They have no effect
|
||||
# when a custom `network` name is set, because that network is used as-is and never
|
||||
# created by the runner. Omit the entire block to use Docker's defaults.
|
||||
network_create_options:
|
||||
enable_ipv4: true # Omit to use Docker's default (IPv4 enabled). Set false to disable IPv4.
|
||||
enable_ipv6: false # Omit to use Docker's default (IPv6 disabled). Enabling it requires dockerd started with --ipv6.
|
||||
# Whether to use privileged mode or not when launching task containers (privileged mode is required for Docker-in-Docker).
|
||||
privileged: false
|
||||
# Any other options to be used when the container is started (e.g., --add-host=my.gitea.url:host-gateway).
|
||||
|
||||
@@ -33,8 +33,8 @@ type Runner struct {
|
||||
FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources.
|
||||
FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources.
|
||||
FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle.
|
||||
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories and orphaned host-mode scratch dirs older than this duration during idle cleanup.
|
||||
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale-directory cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
||||
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories older than this duration during idle cleanup.
|
||||
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale bind-workdir cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
||||
LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush.
|
||||
LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
|
||||
LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
|
||||
@@ -59,7 +59,6 @@ type Cache struct {
|
||||
// Container represents the configuration for the container.
|
||||
type Container struct {
|
||||
Network string `yaml:"network"` // Network specifies the network for the container.
|
||||
NetworkCreateOptions ContainerNetworkCreateOptions `yaml:"network_create_options"` // Add options when the network need to be created by the runner
|
||||
NetworkMode string `yaml:"network_mode"` // Deprecated: use Network instead. Could be removed after Gitea 1.20
|
||||
Privileged bool `yaml:"privileged"` // Privileged indicates whether the container runs in privileged mode.
|
||||
Options string `yaml:"options"` // Options specifies additional options for the container.
|
||||
@@ -73,11 +72,6 @@ type Container struct {
|
||||
BindWorkdir bool `yaml:"bind_workdir"` // BindWorkdir binds the workspace to the host filesystem instead of using Docker volumes. Required for DinD when jobs use docker compose with bind mounts.
|
||||
}
|
||||
|
||||
type ContainerNetworkCreateOptions struct {
|
||||
EnableIPv4 *bool `yaml:"enable_ipv4"` // Enable or disable IPv4 for the network (true for docker by default)
|
||||
EnableIPv6 *bool `yaml:"enable_ipv6"` // Enable or disable IPv6 for the network (false for docker by default)
|
||||
}
|
||||
|
||||
// Host represents the configuration for the host.
|
||||
type Host struct {
|
||||
WorkdirParent string `yaml:"workdir_parent"` // WorkdirParent specifies the parent directory for the host's working directory.
|
||||
|
||||
@@ -117,50 +117,3 @@ func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) {
|
||||
assert.Contains(t, err.Error(), "parse config file")
|
||||
assert.NotContains(t, err.Error(), "defaults metadata")
|
||||
}
|
||||
|
||||
func TestContainerNetworkCreateOptions(t *testing.T) {
|
||||
// Verify that the enable_ipv4/enable_ipv6 YAML keys unmarshal into the *bool fields,
|
||||
// distinguishing an explicit true/false from an omitted key (nil). A nil here is
|
||||
// forwarded as-is to Docker, which applies its own default.
|
||||
loadOptions := func(t *testing.T, yaml string) ContainerNetworkCreateOptions {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "config.yaml")
|
||||
require.NoError(t, os.WriteFile(path, []byte(yaml), 0o600))
|
||||
|
||||
cfg, err := LoadDefault(path)
|
||||
require.NoError(t, err)
|
||||
return cfg.Container.NetworkCreateOptions
|
||||
}
|
||||
|
||||
t.Run("enable_ipv6 true unmarshals to non-nil true", func(t *testing.T) {
|
||||
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv6: true\n")
|
||||
require.NotNil(t, opts.EnableIPv6)
|
||||
assert.True(t, *opts.EnableIPv6)
|
||||
})
|
||||
|
||||
t.Run("enable_ipv6 false unmarshals to non-nil false", func(t *testing.T) {
|
||||
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv6: false\n")
|
||||
require.NotNil(t, opts.EnableIPv6)
|
||||
assert.False(t, *opts.EnableIPv6)
|
||||
})
|
||||
|
||||
t.Run("enable_ipv4 false unmarshals to non-nil false", func(t *testing.T) {
|
||||
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv4: false\n")
|
||||
require.NotNil(t, opts.EnableIPv4)
|
||||
assert.False(t, *opts.EnableIPv4)
|
||||
})
|
||||
|
||||
t.Run("omitted keys stay nil", func(t *testing.T) {
|
||||
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv4: true\n")
|
||||
require.NotNil(t, opts.EnableIPv4)
|
||||
assert.True(t, *opts.EnableIPv4)
|
||||
assert.Nil(t, opts.EnableIPv6, "an omitted enable_ipv6 must remain nil so Docker's default applies")
|
||||
})
|
||||
|
||||
t.Run("omitted block leaves both nil", func(t *testing.T) {
|
||||
opts := loadOptions(t, "container:\n network: \"\"\n")
|
||||
assert.Nil(t, opts.EnableIPv4)
|
||||
assert.Nil(t, opts.EnableIPv6)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user