mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-15 14:24:22 +02:00
Compare commits
4 Commits
renovate/g
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2963716953 | ||
|
|
3996d6d032 | ||
|
|
205af7cd01 | ||
|
|
33e6d1d8ff |
@@ -12,6 +12,13 @@ import (
|
|||||||
// LineHandler is a callback function for handling a line
|
// LineHandler is a callback function for handling a line
|
||||||
type LineHandler func(line string) bool
|
type LineHandler func(line string) bool
|
||||||
|
|
||||||
|
// Flusher is implemented by writers that buffer a trailing, not-yet-terminated
|
||||||
|
// line. Callers should flush once the underlying stream has reached EOF so the
|
||||||
|
// final line (when it is not newline-terminated) is not lost.
|
||||||
|
type Flusher interface {
|
||||||
|
Flush()
|
||||||
|
}
|
||||||
|
|
||||||
type lineWriter struct {
|
type lineWriter struct {
|
||||||
buffer bytes.Buffer
|
buffer bytes.Buffer
|
||||||
handlers []LineHandler
|
handlers []LineHandler
|
||||||
@@ -24,6 +31,14 @@ func NewLineWriter(handlers ...LineHandler) io.Writer {
|
|||||||
return w
|
return w
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FlushWriter flushes w if it implements Flusher. It is a no-op otherwise, so
|
||||||
|
// callers can flush an io.Writer without knowing its concrete type.
|
||||||
|
func FlushWriter(w io.Writer) {
|
||||||
|
if f, ok := w.(Flusher); ok {
|
||||||
|
f.Flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (lw *lineWriter) Write(p []byte) (n int, err error) {
|
func (lw *lineWriter) Write(p []byte) (n int, err error) {
|
||||||
pBuf := bytes.NewBuffer(p)
|
pBuf := bytes.NewBuffer(p)
|
||||||
written := 0
|
written := 0
|
||||||
@@ -44,6 +59,17 @@ func (lw *lineWriter) Write(p []byte) (n int, err error) {
|
|||||||
return written, nil
|
return written, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flush emits any buffered, not-yet-newline-terminated content as a final line.
|
||||||
|
// It is safe to call multiple times; subsequent calls with an empty buffer are
|
||||||
|
// no-ops.
|
||||||
|
func (lw *lineWriter) Flush() {
|
||||||
|
if lw.buffer.Len() == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
lw.handleLine(lw.buffer.String())
|
||||||
|
lw.buffer.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
func (lw *lineWriter) handleLine(line string) {
|
func (lw *lineWriter) handleLine(line string) {
|
||||||
for _, h := range lw.handlers {
|
for _, h := range lw.handlers {
|
||||||
ok := h(line)
|
ok := h(line)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
package common
|
package common
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
@@ -39,3 +40,33 @@ func TestLineWriter(t *testing.T) {
|
|||||||
assert.Equal(" and another\n", lines[2])
|
assert.Equal(" and another\n", lines[2])
|
||||||
assert.Equal("last line\n", lines[3])
|
assert.Equal("last line\n", lines[3])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLineWriterFlush(t *testing.T) {
|
||||||
|
lines := make([]string, 0)
|
||||||
|
lineHandler := func(s string) bool {
|
||||||
|
lines = append(lines, s)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
lineWriter := NewLineWriter(lineHandler)
|
||||||
|
|
||||||
|
assert := assert.New(t)
|
||||||
|
_, err := lineWriter.Write([]byte("complete line\npartial line without newline"))
|
||||||
|
assert.NoError(err) //nolint:testifylint // pre-existing pattern from nektos/act
|
||||||
|
|
||||||
|
// Only the newline-terminated line is emitted before flushing.
|
||||||
|
assert.Equal([]string{"complete line\n"}, lines)
|
||||||
|
|
||||||
|
// Flushing emits the buffered, not-yet-terminated trailing line.
|
||||||
|
FlushWriter(lineWriter)
|
||||||
|
assert.Equal([]string{"complete line\n", "partial line without newline"}, lines)
|
||||||
|
|
||||||
|
// Flushing again is a no-op: nothing is buffered.
|
||||||
|
FlushWriter(lineWriter)
|
||||||
|
assert.Len(lines, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlushWriterIgnoresNonFlusher(t *testing.T) {
|
||||||
|
// FlushWriter must be a safe no-op for writers that do not buffer lines.
|
||||||
|
assert.NotPanics(t, func() { FlushWriter(io.Discard) })
|
||||||
|
}
|
||||||
|
|||||||
@@ -84,6 +84,12 @@ type NewDockerBuildExecutorInput struct {
|
|||||||
Platform string
|
Platform string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewDockerNetworkCreateExecutorInput the input for the NewDockerNetworkCreateExecutor function
|
||||||
|
type NewDockerNetworkCreateExecutorInput struct {
|
||||||
|
EnableIPv4 *bool
|
||||||
|
EnableIPv6 *bool
|
||||||
|
}
|
||||||
|
|
||||||
// NewDockerPullExecutorInput the input for the NewDockerPullExecutor function
|
// NewDockerPullExecutorInput the input for the NewDockerPullExecutor function
|
||||||
type NewDockerPullExecutorInput struct {
|
type NewDockerPullExecutorInput struct {
|
||||||
Image string
|
Image string
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
"github.com/moby/moby/client"
|
"github.com/moby/moby/client"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewDockerNetworkCreateExecutor(name string) common.Executor {
|
func NewDockerNetworkCreateExecutor(name string, opts NewDockerNetworkCreateExecutorInput) common.Executor {
|
||||||
return func(ctx context.Context) error {
|
return func(ctx context.Context) error {
|
||||||
cli, err := GetDockerClient(ctx)
|
cli, err := GetDockerClient(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -39,6 +39,8 @@ func NewDockerNetworkCreateExecutor(name string) common.Executor {
|
|||||||
_, err = cli.NetworkCreate(ctx, name, client.NetworkCreateOptions{
|
_, err = cli.NetworkCreate(ctx, name, client.NetworkCreateOptions{
|
||||||
Driver: "bridge",
|
Driver: "bridge",
|
||||||
Scope: "local",
|
Scope: "local",
|
||||||
|
EnableIPv4: opts.EnableIPv4,
|
||||||
|
EnableIPv6: opts.EnableIPv6,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
"gitea.com/gitea/runner/act/filecollector"
|
"gitea.com/gitea/runner/act/filecollector"
|
||||||
@@ -45,6 +46,13 @@ import (
|
|||||||
"github.com/spf13/pflag"
|
"github.com/spf13/pflag"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// drainGracePeriod bounds how long we wait for an output-copy goroutine to
|
||||||
|
// finish draining a container's output before returning, so that neither a
|
||||||
|
// cancellation (waitForCommand) nor a normal container exit (wait) truncates
|
||||||
|
// the tail of the log. It is a safety bound: in the common case the stream
|
||||||
|
// reaches EOF and the goroutine returns well before this elapses.
|
||||||
|
const drainGracePeriod = 2 * time.Second
|
||||||
|
|
||||||
// NewContainer creates a reference to a container
|
// NewContainer creates a reference to a container
|
||||||
func NewContainer(input *NewContainerInput) ExecutionsEnvironment {
|
func NewContainer(input *NewContainerInput) ExecutionsEnvironment {
|
||||||
cr := new(containerReference)
|
cr := new(containerReference)
|
||||||
@@ -229,6 +237,10 @@ type containerReference struct {
|
|||||||
input *NewContainerInput
|
input *NewContainerInput
|
||||||
UID int
|
UID int
|
||||||
GID int
|
GID int
|
||||||
|
// attachDone is closed by the attach() streaming goroutine once it has
|
||||||
|
// drained and flushed the container's output. wait() blocks on it so the
|
||||||
|
// tail of the log lands before the step proceeds.
|
||||||
|
attachDone chan struct{}
|
||||||
LinuxContainerEnvironmentExtensions
|
LinuxContainerEnvironmentExtensions
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -730,7 +742,9 @@ func (cr *containerReference) tryReadGID() common.Executor {
|
|||||||
func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal bool, resp client.HijackedResponse, _ client.ExecCreateResult, _, _ string) error {
|
func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal bool, resp client.HijackedResponse, _ client.ExecCreateResult, _, _ string) error {
|
||||||
logger := common.Logger(ctx)
|
logger := common.Logger(ctx)
|
||||||
|
|
||||||
cmdResponse := make(chan error)
|
// Buffered so the copy goroutine never blocks on send if the grace-period
|
||||||
|
// drain below times out and no one is left to receive.
|
||||||
|
cmdResponse := make(chan error, 1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
var outWriter io.Writer
|
var outWriter io.Writer
|
||||||
@@ -749,6 +763,11 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
|||||||
} else {
|
} else {
|
||||||
_, err = io.Copy(outWriter, resp.Reader)
|
_, err = io.Copy(outWriter, resp.Reader)
|
||||||
}
|
}
|
||||||
|
// Flush any buffered, not-yet-newline-terminated trailing line so the
|
||||||
|
// final line of a command's output is not lost (e.g. an error message
|
||||||
|
// printed without a trailing newline before the process exits).
|
||||||
|
common.FlushWriter(outWriter)
|
||||||
|
common.FlushWriter(errWriter)
|
||||||
cmdResponse <- err
|
cmdResponse <- err
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -760,6 +779,16 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
|||||||
logger.Warnf("Failed to send CTRL+C: %+s", err)
|
logger.Warnf("Failed to send CTRL+C: %+s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Give the copy goroutine a brief grace period to drain output already
|
||||||
|
// produced by the command before we return, so cancellation does not
|
||||||
|
// truncate the tail of the log. The goroutine exits once the hijacked
|
||||||
|
// stream is closed by resp.Close() in the caller's defer.
|
||||||
|
select {
|
||||||
|
case <-cmdResponse:
|
||||||
|
case <-time.After(drainGracePeriod):
|
||||||
|
logger.Warn("Timed out draining command output after cancellation")
|
||||||
|
}
|
||||||
|
|
||||||
// we return the context canceled error to prevent other steps
|
// we return the context canceled error to prevent other steps
|
||||||
// from executing
|
// from executing
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
@@ -945,14 +974,23 @@ func (cr *containerReference) attach() common.Executor {
|
|||||||
if errWriter == nil {
|
if errWriter == nil {
|
||||||
errWriter = os.Stderr
|
errWriter = os.Stderr
|
||||||
}
|
}
|
||||||
|
done := make(chan struct{})
|
||||||
|
cr.attachDone = done
|
||||||
go func() {
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
var copyErr error
|
||||||
if !isTerminal || os.Getenv("NORAW") != "" {
|
if !isTerminal || os.Getenv("NORAW") != "" {
|
||||||
_, err = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
_, copyErr = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
||||||
} else {
|
} else {
|
||||||
_, err = io.Copy(outWriter, out.Reader)
|
_, copyErr = io.Copy(outWriter, out.Reader)
|
||||||
}
|
}
|
||||||
if err != nil {
|
// Flush any buffered, not-yet-newline-terminated trailing line once
|
||||||
common.Logger(ctx).Error(err)
|
// the stream reaches EOF, so the final line of the container's
|
||||||
|
// output is not lost when it is not newline-terminated.
|
||||||
|
common.FlushWriter(outWriter)
|
||||||
|
common.FlushWriter(errWriter)
|
||||||
|
if copyErr != nil {
|
||||||
|
common.Logger(ctx).Error(copyErr)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
@@ -991,6 +1029,18 @@ func (cr *containerReference) wait() common.Executor {
|
|||||||
|
|
||||||
logger.Debugf("Return status: %v", statusCode)
|
logger.Debugf("Return status: %v", statusCode)
|
||||||
|
|
||||||
|
// The container has exited; wait for the attach() streaming goroutine to
|
||||||
|
// finish draining and flushing its output before returning, so the tail
|
||||||
|
// of the log is not lost. Bounded so a stuck stream cannot hang the step.
|
||||||
|
if cr.attachDone != nil {
|
||||||
|
select {
|
||||||
|
case <-cr.attachDone:
|
||||||
|
case <-time.After(drainGracePeriod):
|
||||||
|
logger.Warn("Timed out draining container output")
|
||||||
|
}
|
||||||
|
cr.attachDone = nil
|
||||||
|
}
|
||||||
|
|
||||||
if statusCode == 0 {
|
if statusCode == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
@@ -20,6 +21,7 @@ import (
|
|||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
|
|
||||||
cerrdefs "github.com/containerd/errdefs"
|
cerrdefs "github.com/containerd/errdefs"
|
||||||
|
"github.com/moby/moby/api/pkg/stdcopy"
|
||||||
"github.com/moby/moby/api/types/container"
|
"github.com/moby/moby/api/types/container"
|
||||||
mobyclient "github.com/moby/moby/client"
|
mobyclient "github.com/moby/moby/client"
|
||||||
"github.com/sirupsen/logrus/hooks/test"
|
"github.com/sirupsen/logrus/hooks/test"
|
||||||
@@ -89,6 +91,11 @@ func (m *mockDockerClient) ExecInspect(ctx context.Context, execID string, opts
|
|||||||
return args.Get(0).(mobyclient.ExecInspectResult), args.Error(1)
|
return args.Get(0).(mobyclient.ExecInspectResult), args.Error(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *mockDockerClient) ContainerAttach(ctx context.Context, containerID string, opts mobyclient.ContainerAttachOptions) (mobyclient.ContainerAttachResult, error) {
|
||||||
|
args := m.Called(ctx, containerID, opts)
|
||||||
|
return args.Get(0).(mobyclient.ContainerAttachResult), args.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *mockDockerClient) ContainerWait(ctx context.Context, containerID string, opts mobyclient.ContainerWaitOptions) mobyclient.ContainerWaitResult {
|
func (m *mockDockerClient) ContainerWait(ctx context.Context, containerID string, opts mobyclient.ContainerWaitOptions) mobyclient.ContainerWaitResult {
|
||||||
args := m.Called(ctx, containerID, opts)
|
args := m.Called(ctx, containerID, opts)
|
||||||
return args.Get(0).(mobyclient.ContainerWaitResult)
|
return args.Get(0).(mobyclient.ContainerWaitResult)
|
||||||
@@ -206,6 +213,71 @@ func TestDockerExecFailure(t *testing.T) {
|
|||||||
client.AssertExpectations(t)
|
client.AssertExpectations(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stdcopyFrame wraps payload in a single Docker multiplexed-stream frame, the
|
||||||
|
// format StdCopy expects: an 8-byte header (stream type + 4-byte big-endian
|
||||||
|
// length) followed by the payload.
|
||||||
|
func stdcopyFrame(stream stdcopy.StdType, payload string) []byte {
|
||||||
|
b := make([]byte, 8+len(payload))
|
||||||
|
b[0] = byte(stream)
|
||||||
|
binary.BigEndian.PutUint32(b[4:8], uint32(len(payload)))
|
||||||
|
copy(b[8:], payload)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDockerAttachFlushesTrailingLine verifies that wait() blocks until the
|
||||||
|
// attach() streaming goroutine has drained and flushed the container's output,
|
||||||
|
// so a final line without a trailing newline is not lost.
|
||||||
|
func TestDockerAttachFlushesTrailingLine(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
framed := bytes.NewBuffer(stdcopyFrame(stdcopy.Stdout, "line one\nlast line without newline"))
|
||||||
|
|
||||||
|
var lines []string
|
||||||
|
logWriter := common.NewLineWriter(func(s string) bool {
|
||||||
|
lines = append(lines, s)
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
client := &mockDockerClient{}
|
||||||
|
client.On("ContainerAttach", ctx, "123", mock.AnythingOfType("client.ContainerAttachOptions")).
|
||||||
|
Return(mobyclient.ContainerAttachResult{
|
||||||
|
HijackedResponse: mobyclient.HijackedResponse{
|
||||||
|
Conn: &mockConn{},
|
||||||
|
Reader: bufio.NewReader(framed),
|
||||||
|
},
|
||||||
|
}, nil)
|
||||||
|
|
||||||
|
statusCh := make(chan container.WaitResponse, 1)
|
||||||
|
statusCh <- container.WaitResponse{StatusCode: 0}
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
client.On("ContainerWait", ctx, "123", mobyclient.ContainerWaitOptions{Condition: container.WaitConditionNotRunning}).
|
||||||
|
Return(mobyclient.ContainerWaitResult{
|
||||||
|
Result: (<-chan container.WaitResponse)(statusCh),
|
||||||
|
Error: (<-chan error)(errCh),
|
||||||
|
})
|
||||||
|
|
||||||
|
cr := &containerReference{
|
||||||
|
id: "123",
|
||||||
|
cli: client,
|
||||||
|
input: &NewContainerInput{
|
||||||
|
Image: "image",
|
||||||
|
Stdout: logWriter,
|
||||||
|
Stderr: logWriter,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, cr.attach()(ctx))
|
||||||
|
require.NoError(t, cr.wait()(ctx))
|
||||||
|
|
||||||
|
// wait() must have blocked until the goroutine drained AND flushed; the
|
||||||
|
// trailing, non-newline-terminated line must therefore be present. Reading
|
||||||
|
// lines here is race-free because wait() synchronizes on attachDone, which
|
||||||
|
// the goroutine closes after the final append.
|
||||||
|
assert.Equal(t, []string{"line one\n", "last line without newline"}, lines)
|
||||||
|
|
||||||
|
client.AssertExpectations(t)
|
||||||
|
}
|
||||||
|
|
||||||
func TestDockerWaitFailure(t *testing.T) {
|
func TestDockerWaitFailure(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ func NewDockerVolumeRemoveExecutor(volume string, force bool) common.Executor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDockerNetworkCreateExecutor(name string) common.Executor {
|
func NewDockerNetworkCreateExecutor(name string, opts NewDockerNetworkCreateExecutorInput) common.Executor {
|
||||||
return func(ctx context.Context) error {
|
return func(ctx context.Context) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -323,15 +323,15 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
|||||||
cmd.Dir = wd
|
cmd.Dir = wd
|
||||||
cmd.SysProcAttr = getSysProcAttr(cmdline, false)
|
cmd.SysProcAttr = getSysProcAttr(cmdline, false)
|
||||||
|
|
||||||
// On Windows a step often launches a process tree (a shell that starts a
|
// A step often launches a process tree (a shell that starts a child which
|
||||||
// child which spawns further GUI or background processes). The default
|
// spawns further background or GUI processes). The default context
|
||||||
// context cancellation only kills the direct child, leaving the rest of the
|
// cancellation only kills the direct child, leaving the rest of the tree
|
||||||
// tree running; and because the orphans inherit cmd's stdout/stderr pipe,
|
// running; and because the orphans inherit cmd's stdout/stderr pipe,
|
||||||
// cmd.Wait() would block forever, hanging the runner. Kill the whole tree
|
// cmd.Wait() would block forever, hanging the runner. Kill the whole tree on
|
||||||
// via a Job Object on cancellation, and bound the wait so a leftover pipe
|
// cancellation — via a Job Object on Windows and the process group on Unix
|
||||||
// writer can never hang Wait indefinitely.
|
// (see processKiller) — and bound the wait so a leftover pipe writer can
|
||||||
|
// never hang Wait indefinitely.
|
||||||
var killer atomic.Pointer[processKiller]
|
var killer atomic.Pointer[processKiller]
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
cmd.Cancel = func() error {
|
cmd.Cancel = func() error {
|
||||||
if k := killer.Load(); k != nil {
|
if k := killer.Load(); k != nil {
|
||||||
return k.Kill()
|
return k.Kill()
|
||||||
@@ -341,10 +341,10 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Once the step process has exited, give its I/O pipes at most this long
|
// Once the step process has exited, give its I/O pipes at most this long to
|
||||||
// to drain before Wait force-closes them and returns (Go's WaitDelay).
|
// drain before Wait force-closes them and returns (Go's WaitDelay). This
|
||||||
|
// also covers a step that backgrounds a process holding the pipe open.
|
||||||
cmd.WaitDelay = 10 * time.Second
|
cmd.WaitDelay = 10 * time.Second
|
||||||
}
|
|
||||||
|
|
||||||
var ppty *os.File
|
var ppty *os.File
|
||||||
var tty *os.File
|
var tty *os.File
|
||||||
@@ -375,18 +375,17 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
|||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if runtime.GOOS == "windows" {
|
// Capture the started process for tree-kill on cancellation: a Job Object on
|
||||||
// Assign the started process to a Job Object so cmd.Cancel can kill the
|
// Windows (children spawned afterwards are auto-included) and the process
|
||||||
// whole descendant tree. Children spawned afterwards are auto-included.
|
// group on Unix. On failure (e.g. Windows nested-job restrictions) we fall
|
||||||
// On failure (e.g. nested-job restrictions) we fall back to the default
|
// back to the default single-process kill; WaitDelay + end-of-job cleanup
|
||||||
// single-process kill; WaitDelay + end-of-job cleanup still apply.
|
// still apply.
|
||||||
if k, kerr := newProcessKiller(cmd.Process); kerr != nil {
|
if k, kerr := newProcessKiller(cmd.Process); kerr != nil {
|
||||||
common.Logger(ctx).Warnf("process tree kill setup failed, falling back to single-process kill: %v", kerr)
|
common.Logger(ctx).Warnf("process tree kill setup failed, falling back to single-process kill: %v", kerr)
|
||||||
} else {
|
} else {
|
||||||
killer.Store(k)
|
killer.Store(k)
|
||||||
defer k.Close()
|
defer k.Close()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
err = cmd.Wait()
|
err = cmd.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
var exitErr *exec.ExitError
|
var exitErr *exec.ExitError
|
||||||
@@ -429,6 +428,24 @@ func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string)
|
|||||||
return parseEnvFile(e, srcPath, env)
|
return parseEnvFile(e, srcPath, env)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// removeAll is the filesystem delete used by removeAllWithContext. A package
|
||||||
|
// var so tests can substitute a blocking stub without patching os.RemoveAll.
|
||||||
|
var removeAll = os.RemoveAll
|
||||||
|
|
||||||
|
// removeAllWithContext runs removeAll in a goroutine and returns once it
|
||||||
|
// finishes or ctx is cancelled. On cancellation the goroutine is left running —
|
||||||
|
// a delete blocked inside a syscall cannot be interrupted (see runWithTimeout).
|
||||||
|
func removeAllWithContext(ctx context.Context, path string) error {
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- removeAll(path) }()
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
return err
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func removePathWithRetry(ctx context.Context, path string) error {
|
func removePathWithRetry(ctx context.Context, path string) error {
|
||||||
if path == "" {
|
if path == "" {
|
||||||
return nil
|
return nil
|
||||||
@@ -448,10 +465,13 @@ func removePathWithRetry(ctx context.Context, path string) error {
|
|||||||
case <-time.After(delay):
|
case <-time.After(delay):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lastErr = os.RemoveAll(path)
|
lastErr = removeAllWithContext(ctx, path)
|
||||||
if lastErr == nil {
|
if lastErr == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if errors.Is(lastErr, context.DeadlineExceeded) {
|
||||||
|
return lastErr
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return lastErr
|
return lastErr
|
||||||
}
|
}
|
||||||
@@ -533,23 +553,61 @@ func (e *HostEnvironment) terminateRunningProcesses(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hostCleanupTimeout bounds each filesystem-teardown phase of the host
|
||||||
|
// environment so a single stalled delete cannot wedge the runner slot forever.
|
||||||
|
// A var (not const) so tests can shrink it.
|
||||||
|
var hostCleanupTimeout = 30 * time.Second
|
||||||
|
|
||||||
|
// runWithTimeout runs fn in a goroutine and returns once it finishes or timeout
|
||||||
|
// elapses, whichever comes first. On timeout the goroutine is left running — an
|
||||||
|
// os.RemoveAll blocked inside a delete syscall (AV/EDR filter drivers, an
|
||||||
|
// unresponsive network mount, a dying disk) cannot be interrupted — and
|
||||||
|
// context.DeadlineExceeded is returned. Leaking the goroutine and the scratch
|
||||||
|
// state it was deleting is strictly better than blocking the caller forever and
|
||||||
|
// permanently losing the runner's capacity slot; the leaked scratch dir is
|
||||||
|
// reclaimed later by the runner's idle stale-dir sweep.
|
||||||
|
func runWithTimeout(fn func(), timeout time.Duration) error {
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
fn()
|
||||||
|
}()
|
||||||
|
timer := time.NewTimer(timeout)
|
||||||
|
defer timer.Stop()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
return nil
|
||||||
|
case <-timer.C:
|
||||||
|
return context.DeadlineExceeded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (e *HostEnvironment) Remove() common.Executor {
|
func (e *HostEnvironment) Remove() common.Executor {
|
||||||
return func(ctx context.Context) error {
|
return func(ctx context.Context) error {
|
||||||
|
logger := common.Logger(ctx)
|
||||||
|
|
||||||
// Ensure any lingering child processes are ended before attempting
|
// Ensure any lingering child processes are ended before attempting
|
||||||
// to remove the workspace (Windows file locks otherwise prevent cleanup).
|
// to remove the workspace (Windows file locks otherwise prevent cleanup).
|
||||||
e.terminateRunningProcesses(ctx)
|
e.terminateRunningProcesses(ctx)
|
||||||
|
|
||||||
// Only removes per-job misc state. Must not remove the cache/toolcache root.
|
// Only removes per-job misc state. Must not remove the cache/toolcache root.
|
||||||
|
// Bound it: CleanUp is a caller-supplied, typically unbounded os.RemoveAll,
|
||||||
|
// and a delete stalled by a filesystem filter driver would otherwise hang
|
||||||
|
// the job forever at "Cleaning up container" and hold the capacity slot.
|
||||||
if e.CleanUp != nil {
|
if e.CleanUp != nil {
|
||||||
e.CleanUp()
|
logger.Debugf("running host environment cleanup callback")
|
||||||
|
if err := runWithTimeout(e.CleanUp, hostCleanupTimeout); err != nil {
|
||||||
|
logger.Warnf("host environment cleanup did not finish within %s; continuing job completion, scratch state may be leaked and is reclaimed by the idle stale-dir sweep", hostCleanupTimeout)
|
||||||
|
} else {
|
||||||
|
logger.Debugf("host environment cleanup callback finished")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detach: a cancelled ctx would skip removePathWithRetry's retries,
|
// Detach: a cancelled ctx would skip removePathWithRetry's retries,
|
||||||
// which absorb Windows file-handle release lag after the kill above.
|
// which absorb Windows file-handle release lag after the kill above.
|
||||||
rmCtx, rmCancel := context.WithTimeout(context.Background(), 30*time.Second)
|
rmCtx, rmCancel := context.WithTimeout(context.Background(), hostCleanupTimeout)
|
||||||
defer rmCancel()
|
defer rmCancel()
|
||||||
|
|
||||||
logger := common.Logger(ctx)
|
|
||||||
var errs []error
|
var errs []error
|
||||||
if err := removePathWithRetry(rmCtx, e.Path); err != nil {
|
if err := removePathWithRetry(rmCtx, e.Path); err != nil {
|
||||||
logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
|
logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
|
||||||
@@ -561,8 +619,15 @@ func (e *HostEnvironment) Remove() common.Executor {
|
|||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for _, err := range errs {
|
||||||
|
if !errors.Is(err, context.DeadlineExceeded) {
|
||||||
return errors.Join(errs...)
|
return errors.Join(errs...)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// Bounded teardown timed out; warnings already logged above. Do not
|
||||||
|
// fail job completion — leaked scratch is reclaimed by the idle sweep.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *HostEnvironment) ToContainerPath(path string) string {
|
func (e *HostEnvironment) ToContainerPath(path string) string {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
|
|
||||||
@@ -188,6 +189,118 @@ func TestHostEnvironmentRemoveCleansWorkdirWhenOwned(t *testing.T) {
|
|||||||
assert.ErrorIs(t, err, os.ErrNotExist)
|
assert.ErrorIs(t, err, os.ErrNotExist)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRemoveAllWithContextDoesNotHangOnStuckDelete(t *testing.T) {
|
||||||
|
release := make(chan struct{})
|
||||||
|
stubDone := make(chan struct{})
|
||||||
|
|
||||||
|
orig := removeAll
|
||||||
|
removeAll = func(string) error {
|
||||||
|
defer close(stubDone)
|
||||||
|
<-release
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// removeAllWithContext intentionally leaks the delete goroutine on timeout,
|
||||||
|
// and that goroutine still references removeAll. Unblock it and wait for it
|
||||||
|
// to return before restoring the var, so the restore can't race the read.
|
||||||
|
t.Cleanup(func() {
|
||||||
|
close(release)
|
||||||
|
<-stubDone
|
||||||
|
removeAll = orig
|
||||||
|
})
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
err := removeAllWithContext(ctx, t.TempDir())
|
||||||
|
require.ErrorIs(t, err, context.DeadlineExceeded)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHostEnvironmentRemoveDoesNotHangOnStuckCleanUp guards against a stalled
|
||||||
|
// CleanUp callback (e.g. an os.RemoveAll blocked by an AV/EDR filter driver or
|
||||||
|
// an unresponsive mount) wedging the runner slot forever at "Cleaning up
|
||||||
|
// container". Remove must time out the callback and complete job teardown.
|
||||||
|
func TestHostEnvironmentRemoveDoesNotHangOnStuckCleanUp(t *testing.T) {
|
||||||
|
// Keep the suite fast: shrink the per-phase teardown timeout for this test.
|
||||||
|
orig := hostCleanupTimeout
|
||||||
|
hostCleanupTimeout = 100 * time.Millisecond
|
||||||
|
t.Cleanup(func() { hostCleanupTimeout = orig })
|
||||||
|
|
||||||
|
logger := logrus.New()
|
||||||
|
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||||
|
base := t.TempDir()
|
||||||
|
path := filepath.Join(base, "misc", "hostexecutor")
|
||||||
|
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||||
|
|
||||||
|
release := make(chan struct{})
|
||||||
|
t.Cleanup(func() { close(release) }) // unblock the leaked goroutine at test end
|
||||||
|
|
||||||
|
e := &HostEnvironment{
|
||||||
|
Path: path,
|
||||||
|
CleanUp: func() {
|
||||||
|
<-release // simulate a delete syscall stuck indefinitely
|
||||||
|
},
|
||||||
|
StdOut: os.Stdout,
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- e.Remove()(ctx) }()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
require.NoError(t, err)
|
||||||
|
case <-time.After(10 * time.Second):
|
||||||
|
t.Fatal("Remove() hung on a stuck CleanUp callback")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHostEnvironmentRemoveDoesNotHangOnStuckPathRemoval guards against a
|
||||||
|
// stalled os.RemoveAll on the misc/workspace paths (same AV/EDR wedge as
|
||||||
|
// #1023) wedging job completion after the CleanUp callback has already timed
|
||||||
|
// out or finished.
|
||||||
|
func TestHostEnvironmentRemoveDoesNotHangOnStuckPathRemoval(t *testing.T) {
|
||||||
|
origTimeout := hostCleanupTimeout
|
||||||
|
hostCleanupTimeout = 100 * time.Millisecond
|
||||||
|
t.Cleanup(func() { hostCleanupTimeout = origTimeout })
|
||||||
|
|
||||||
|
release := make(chan struct{})
|
||||||
|
stubDone := make(chan struct{})
|
||||||
|
|
||||||
|
origRemoveAll := removeAll
|
||||||
|
removeAll = func(string) error {
|
||||||
|
defer close(stubDone)
|
||||||
|
<-release
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// The stuck delete goroutine outlives the timed-out Remove and still reads
|
||||||
|
// removeAll; unblock it and wait before restoring to avoid a restore/read race.
|
||||||
|
t.Cleanup(func() {
|
||||||
|
close(release)
|
||||||
|
<-stubDone
|
||||||
|
removeAll = origRemoveAll
|
||||||
|
})
|
||||||
|
|
||||||
|
logger := logrus.New()
|
||||||
|
ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
|
||||||
|
base := t.TempDir()
|
||||||
|
path := filepath.Join(base, "misc", "hostexecutor")
|
||||||
|
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||||
|
|
||||||
|
e := &HostEnvironment{
|
||||||
|
Path: path,
|
||||||
|
StdOut: os.Stdout,
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- e.Remove()(ctx) }()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-done:
|
||||||
|
require.NoError(t, err)
|
||||||
|
case <-time.After(10 * time.Second):
|
||||||
|
t.Fatal("Remove() hung on a stuck path removal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestBuildWindowsWorkspaceKillScript(t *testing.T) {
|
func TestBuildWindowsWorkspaceKillScript(t *testing.T) {
|
||||||
t.Run("single dir", func(t *testing.T) {
|
t.Run("single dir", func(t *testing.T) {
|
||||||
s := buildWindowsWorkspaceKillScript([]string{`C:\workspace\job1`})
|
s := buildWindowsWorkspaceKillScript([]string{`C:\workspace\job1`})
|
||||||
|
|||||||
@@ -1,19 +1,29 @@
|
|||||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
//go:build !windows
|
//go:build plan9
|
||||||
|
|
||||||
package container
|
package container
|
||||||
|
|
||||||
import "os"
|
import "os"
|
||||||
|
|
||||||
// processKiller is a no-op on non-Windows platforms. The Job Object based
|
// processKiller falls back to single-process termination on platforms without
|
||||||
// tree-kill is only wired in on Windows (see exec()); elsewhere the default
|
// a process-group / Job Object tree-kill. The Job Object (Windows) and process
|
||||||
// exec.CommandContext cancellation and Setpgid handling apply.
|
// group (Unix) based tree-kills live in process_windows.go / process_unix.go;
|
||||||
type processKiller struct{}
|
// here we just kill the direct child, matching the previous default behaviour.
|
||||||
|
type processKiller struct {
|
||||||
|
p *os.Process
|
||||||
|
}
|
||||||
|
|
||||||
func newProcessKiller(_ *os.Process) (*processKiller, error) { return &processKiller{}, nil }
|
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
||||||
|
return &processKiller{p: p}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (k *processKiller) Kill() error { return nil }
|
func (k *processKiller) Kill() error {
|
||||||
|
if k == nil || k.p == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return k.p.Kill()
|
||||||
|
}
|
||||||
|
|
||||||
func (k *processKiller) Close() error { return nil }
|
func (k *processKiller) Close() error { return nil }
|
||||||
|
|||||||
56
act/container/process_unix.go
Normal file
56
act/container/process_unix.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build !windows && !plan9
|
||||||
|
|
||||||
|
package container
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// processKiller terminates a step process together with its whole process
|
||||||
|
// group, which is the Unix counterpart of the Windows Job Object tree-kill.
|
||||||
|
//
|
||||||
|
// Background: a step often launches a process tree (a shell that starts a child
|
||||||
|
// which in turn spawns further background processes). The default
|
||||||
|
// exec.CommandContext cancellation only kills the direct child, so cancelling a
|
||||||
|
// job left the rest of the tree running. Because those orphans inherited the
|
||||||
|
// step's stdout/stderr pipe, cmd.Wait() also blocked forever and the runner
|
||||||
|
// hung.
|
||||||
|
//
|
||||||
|
// Steps are started with Setpgid (or Setsid for the PTY path, see
|
||||||
|
// getSysProcAttr), which makes the step process the leader of a new process
|
||||||
|
// group whose ID equals its PID. Signalling the negative PID delivers to every
|
||||||
|
// process still in that group, so we can tear down the whole tree atomically on
|
||||||
|
// cancellation, which also closes the inherited pipe handles so cmd.Wait() can
|
||||||
|
// return.
|
||||||
|
type processKiller struct {
|
||||||
|
pgid int
|
||||||
|
}
|
||||||
|
|
||||||
|
// newProcessKiller captures the process group of p (an already-started
|
||||||
|
// process). Because the step is launched with Setpgid/Setsid, p is a group
|
||||||
|
// leader and its PGID equals its PID; children spawned afterwards stay in the
|
||||||
|
// same group unless they explicitly create their own.
|
||||||
|
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
||||||
|
return &processKiller{pgid: p.Pid}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill sends SIGKILL to the entire process group (the step process and every
|
||||||
|
// descendant that stayed in the group). A missing group (ESRCH) means the
|
||||||
|
// processes already exited and is not treated as an error.
|
||||||
|
func (k *processKiller) Kill() error {
|
||||||
|
if k == nil || k.pgid <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := syscall.Kill(-k.pgid, syscall.SIGKILL); err != nil && !errors.Is(err, syscall.ESRCH) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close is a no-op on Unix; there is no job handle to release.
|
||||||
|
func (k *processKiller) Close() error { return nil }
|
||||||
100
act/container/process_unix_test.go
Normal file
100
act/container/process_unix_test.go
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build !windows && !plan9
|
||||||
|
|
||||||
|
package container
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
// processAlive reports whether pid refers to a still-running process. Signal 0
|
||||||
|
// performs error checking without delivering a signal: a nil error (or EPERM)
|
||||||
|
// means the process exists, ESRCH means it is gone.
|
||||||
|
//
|
||||||
|
// On Linux, zombie processes (state Z in /proc/<pid>/stat) appear alive to
|
||||||
|
// kill(0) but have already terminated — their corpse lingers until the parent
|
||||||
|
// calls wait(). In a Docker container the child may be reparented to a PID 1
|
||||||
|
// that does not reap promptly, so we treat zombies as not alive.
|
||||||
|
func processAlive(pid int) bool {
|
||||||
|
err := syscall.Kill(pid, 0)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// On Linux /proc is available; check whether the process is a zombie.
|
||||||
|
if b, readErr := os.ReadFile(fmt.Sprintf("/proc/%d/stat", pid)); readErr == nil {
|
||||||
|
// Format: "pid (comm) state ..." — state follows the closing ')' of the
|
||||||
|
// command name (which may itself contain spaces and parens).
|
||||||
|
rest := string(b)
|
||||||
|
if idx := strings.LastIndex(rest, ") "); idx >= 0 {
|
||||||
|
fields := strings.Fields(rest[idx+2:])
|
||||||
|
if len(fields) > 0 && fields[0] == "Z" {
|
||||||
|
return false // zombie: terminated but not yet reaped
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestProcessKillerKillsTree verifies that a process group captured by the
|
||||||
|
// killer is terminated together with a child the step spawns afterwards. This
|
||||||
|
// mirrors a step that launches a child which spawns further processes, where
|
||||||
|
// cancelling the job must take down the whole tree, not just the direct child.
|
||||||
|
func TestProcessKillerKillsTree(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
pidFile := filepath.Join(dir, "child.pid")
|
||||||
|
|
||||||
|
// Parent shell backgrounds a long-lived child (writing its PID to a file)
|
||||||
|
// and then sleeps. With job control off (non-interactive sh) the backgrounded
|
||||||
|
// child stays in the parent's process group, so the group kill must reach it.
|
||||||
|
script := fmt.Sprintf(`sleep 600 & echo $! > %q; sleep 600`, pidFile)
|
||||||
|
cmd := exec.Command("/bin/sh", "-c", script)
|
||||||
|
// Launch as its own process-group leader, exactly like a real step does (see
|
||||||
|
// getSysProcAttr), so the killer's PGID == the process PID.
|
||||||
|
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||||
|
require.NoError(t, cmd.Start())
|
||||||
|
t.Cleanup(func() {
|
||||||
|
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
|
||||||
|
_ = cmd.Wait()
|
||||||
|
})
|
||||||
|
|
||||||
|
killer, err := newProcessKiller(cmd.Process)
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer killer.Close()
|
||||||
|
|
||||||
|
// Wait for the backgrounded child PID to be reported.
|
||||||
|
var childPID int
|
||||||
|
require.Eventually(t, func() bool {
|
||||||
|
b, e := os.ReadFile(pidFile)
|
||||||
|
if e != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
s := strings.TrimSpace(string(b))
|
||||||
|
if s == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
childPID, _ = strconv.Atoi(s)
|
||||||
|
return childPID > 0 && processAlive(childPID)
|
||||||
|
}, 20*time.Second, 100*time.Millisecond, "child process should start")
|
||||||
|
|
||||||
|
// Killing the group must terminate both the parent and the backgrounded child.
|
||||||
|
require.NoError(t, killer.Kill())
|
||||||
|
// Reap the parent so it does not linger as a zombie (which would still report
|
||||||
|
// as alive); SIGKILL makes Wait return promptly.
|
||||||
|
_ = cmd.Wait()
|
||||||
|
|
||||||
|
require.Eventually(t, func() bool {
|
||||||
|
return !processAlive(childPID)
|
||||||
|
}, 20*time.Second, 100*time.Millisecond, "backgrounded child should be terminated")
|
||||||
|
}
|
||||||
@@ -48,8 +48,11 @@ func (rc *RunContext) commandHandler(ctx context.Context) common.LineHandler {
|
|||||||
if resumeCommand != "" && command != resumeCommand {
|
if resumeCommand != "" && command != resumeCommand {
|
||||||
// There should not be any emojis in the log output for Gitea.
|
// There should not be any emojis in the log output for Gitea.
|
||||||
// The code in the switch statement is the same.
|
// The code in the switch statement is the same.
|
||||||
|
// Return true (not false) so the line still reaches the raw_output
|
||||||
|
// log handler; otherwise everything between ::stop-commands:: and
|
||||||
|
// its end token is silently dropped from the step log.
|
||||||
logger.Infof("%s", line)
|
logger.Infof("%s", line)
|
||||||
return false
|
return true
|
||||||
}
|
}
|
||||||
arg = UnescapeCommandData(arg)
|
arg = UnescapeCommandData(arg)
|
||||||
kvPairs = unescapeKvPairs(kvPairs)
|
kvPairs = unescapeKvPairs(kvPairs)
|
||||||
|
|||||||
@@ -28,6 +28,29 @@ func TestSetEnv(t *testing.T) {
|
|||||||
a.Equal("valz", rc.Env["x"])
|
a.Equal("valz", rc.Env["x"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStopCommandsKeepsSuppressedLinesInLog(t *testing.T) {
|
||||||
|
a := assert.New(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
rc := new(RunContext)
|
||||||
|
handler := rc.commandHandler(ctx)
|
||||||
|
|
||||||
|
// Stop command processing until the matching end token is seen.
|
||||||
|
a.True(handler("::stop-commands::my-end-token\n"))
|
||||||
|
|
||||||
|
// A command-shaped line while stopped must not be executed (env unchanged),
|
||||||
|
// but must still return true so it reaches the raw_output log handler and is
|
||||||
|
// not dropped from the step log.
|
||||||
|
a.True(handler("::set-env name=x::valz\n"))
|
||||||
|
a.NotContains(rc.Env, "x")
|
||||||
|
|
||||||
|
// The matching end token resumes command processing.
|
||||||
|
a.True(handler("::my-end-token::\n"))
|
||||||
|
|
||||||
|
// Commands are processed again after resuming.
|
||||||
|
a.True(handler("::set-env name=y::valy\n"))
|
||||||
|
a.Equal("valy", rc.Env["y"])
|
||||||
|
}
|
||||||
|
|
||||||
func TestSetOutput(t *testing.T) {
|
func TestSetOutput(t *testing.T) {
|
||||||
a := assert.New(t)
|
a := assert.New(t)
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|||||||
@@ -462,6 +462,11 @@ func useStepLogger(rc *RunContext, stepModel *model.Step, stage stepStage, execu
|
|||||||
oldout, olderr := rc.JobContainer.ReplaceLogWriter(logWriter, logWriter)
|
oldout, olderr := rc.JobContainer.ReplaceLogWriter(logWriter, logWriter)
|
||||||
defer rc.JobContainer.ReplaceLogWriter(oldout, olderr)
|
defer rc.JobContainer.ReplaceLogWriter(oldout, olderr)
|
||||||
|
|
||||||
|
// Flush any buffered, not-yet-newline-terminated trailing line once the
|
||||||
|
// step has finished, so the final line of the step's output is not lost
|
||||||
|
// when it is not newline-terminated.
|
||||||
|
defer common.FlushWriter(logWriter)
|
||||||
|
|
||||||
return executor(ctx)
|
return executor(ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -471,7 +471,8 @@ func (rc *RunContext) startJobContainer() common.Executor {
|
|||||||
rc.pullServicesImages(rc.Config.ForcePull),
|
rc.pullServicesImages(rc.Config.ForcePull),
|
||||||
rc.JobContainer.Pull(rc.Config.ForcePull),
|
rc.JobContainer.Pull(rc.Config.ForcePull),
|
||||||
rc.stopJobContainer(),
|
rc.stopJobContainer(),
|
||||||
container.NewDockerNetworkCreateExecutor(networkName).IfBool(createAndDeleteNetwork),
|
container.NewDockerNetworkCreateExecutor(networkName, rc.Config.ContainerNetworkCreateOptions).
|
||||||
|
IfBool(createAndDeleteNetwork),
|
||||||
rc.startServiceContainers(networkName),
|
rc.startServiceContainers(networkName),
|
||||||
rc.JobContainer.Create(rc.Config.ContainerCapAdd, rc.Config.ContainerCapDrop),
|
rc.JobContainer.Create(rc.Config.ContainerCapAdd, rc.Config.ContainerCapDrop),
|
||||||
rc.JobContainer.Start(false),
|
rc.JobContainer.Start(false),
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
|
"gitea.com/gitea/runner/act/container"
|
||||||
"gitea.com/gitea/runner/act/model"
|
"gitea.com/gitea/runner/act/model"
|
||||||
|
|
||||||
docker_container "github.com/moby/moby/api/types/container"
|
docker_container "github.com/moby/moby/api/types/container"
|
||||||
@@ -68,6 +69,7 @@ type Config struct {
|
|||||||
ReplaceGheActionTokenWithGithubCom string // Token of private action repo on GitHub.
|
ReplaceGheActionTokenWithGithubCom string // Token of private action repo on GitHub.
|
||||||
Matrix map[string]map[string]bool // Matrix config to run
|
Matrix map[string]map[string]bool // Matrix config to run
|
||||||
ContainerNetworkMode docker_container.NetworkMode // the network mode of job containers (the value of --network)
|
ContainerNetworkMode docker_container.NetworkMode // the network mode of job containers (the value of --network)
|
||||||
|
ContainerNetworkCreateOptions container.NewDockerNetworkCreateExecutorInput // the default network create options
|
||||||
ActionCache ActionCache // Use a custom ActionCache Implementation
|
ActionCache ActionCache // Use a custom ActionCache Implementation
|
||||||
|
|
||||||
PresetGitHubContext *model.GithubContext // the preset github context, overrides some fields like DefaultBranch, Env, Secrets etc.
|
PresetGitHubContext *model.GithubContext // the preset github context, overrides some fields like DefaultBranch, Env, Secrets etc.
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -36,7 +36,7 @@ require (
|
|||||||
github.com/stretchr/testify v1.11.1
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/timshannon/bolthold v0.0.0-20240314194003-30aac6950928
|
github.com/timshannon/bolthold v0.0.0-20240314194003-30aac6950928
|
||||||
go.etcd.io/bbolt v1.4.3
|
go.etcd.io/bbolt v1.4.3
|
||||||
go.yaml.in/yaml/v4 v4.0.0-rc.5
|
go.yaml.in/yaml/v4 v4.0.0-rc.3
|
||||||
golang.org/x/sys v0.46.0
|
golang.org/x/sys v0.46.0
|
||||||
golang.org/x/term v0.44.0
|
golang.org/x/term v0.44.0
|
||||||
google.golang.org/protobuf v1.36.11
|
google.golang.org/protobuf v1.36.11
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -232,8 +232,6 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
|||||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||||
go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go=
|
go.yaml.in/yaml/v4 v4.0.0-rc.3 h1:3h1fjsh1CTAPjW7q/EMe+C8shx5d8ctzZTrLcs/j8Go=
|
||||||
go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0=
|
go.yaml.in/yaml/v4 v4.0.0-rc.3/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0=
|
||||||
go.yaml.in/yaml/v4 v4.0.0-rc.5 h1:JVliQq9EGOYaTgMi+k8BhUJyqcGk4ZqeuiN1Cirba9c=
|
|
||||||
go.yaml.in/yaml/v4 v4.0.0-rc.5/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0=
|
|
||||||
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||||
golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988=
|
golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988=
|
||||||
golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc=
|
golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc=
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import (
|
|||||||
|
|
||||||
"gitea.com/gitea/runner/act/artifactcache"
|
"gitea.com/gitea/runner/act/artifactcache"
|
||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
|
"gitea.com/gitea/runner/act/container"
|
||||||
"gitea.com/gitea/runner/act/model"
|
"gitea.com/gitea/runner/act/model"
|
||||||
"gitea.com/gitea/runner/act/runner"
|
"gitea.com/gitea/runner/act/runner"
|
||||||
"gitea.com/gitea/runner/internal/pkg/client"
|
"gitea.com/gitea/runner/internal/pkg/client"
|
||||||
@@ -33,7 +34,7 @@ import (
|
|||||||
|
|
||||||
"connectrpc.com/connect"
|
"connectrpc.com/connect"
|
||||||
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
||||||
"github.com/moby/moby/api/types/container"
|
docker_container "github.com/moby/moby/api/types/container"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -127,15 +128,22 @@ func (r *Runner) OnIdle(ctx context.Context) {
|
|||||||
if !r.shouldRunIdleCleanup() {
|
if !r.shouldRunIdleCleanup() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Bind-workdir mode: reclaim stale per-task workspace dirs (numeric task IDs).
|
||||||
|
if r.cfg.Container.BindWorkdir {
|
||||||
workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/")
|
workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/")
|
||||||
workdirRoot := filepath.FromSlash("/" + workdirParent)
|
workdirRoot := filepath.FromSlash("/" + workdirParent)
|
||||||
r.cleanupStaleTaskDirs(ctx, workdirRoot)
|
r.cleanupStaleDirs(ctx, workdirRoot, isTaskIDDir)
|
||||||
|
}
|
||||||
|
// Host mode: reclaim per-job scratch dirs left behind when HostEnvironment
|
||||||
|
// cleanup timed out (e.g. a delete stalled by an AV/EDR filter driver). They
|
||||||
|
// sit under the host workdir parent alongside the shared tool_cache, which
|
||||||
|
// the name match leaves untouched. No-op when no host-mode job ever ran.
|
||||||
|
if hostRoot := filepath.FromSlash(r.cfg.Host.WorkdirParent); hostRoot != "" {
|
||||||
|
r.cleanupStaleDirs(ctx, hostRoot, isHostScratchDir)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Runner) shouldRunIdleCleanup() bool {
|
func (r *Runner) shouldRunIdleCleanup() bool {
|
||||||
if !r.cfg.Container.BindWorkdir {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if r.cfg.Runner.WorkdirCleanupAge <= 0 || r.cfg.Runner.IdleCleanupInterval <= 0 {
|
if r.cfg.Runner.WorkdirCleanupAge <= 0 || r.cfg.Runner.IdleCleanupInterval <= 0 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -155,18 +163,52 @@ func (r *Runner) shouldRunIdleCleanup() bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanupStaleTaskDirs reclaims stale bind-workdir per-task directories under
|
||||||
|
// workdirRoot. Retained as a thin wrapper so existing callers and tests keep a
|
||||||
|
// stable entry point.
|
||||||
func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) {
|
func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) {
|
||||||
entries, err := os.ReadDir(workdirRoot)
|
r.cleanupStaleDirs(ctx, workdirRoot, isTaskIDDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTaskIDDir reports whether name is a per-task workspace dir (numeric task
|
||||||
|
// ID). Any other directory is skipped to avoid deleting operator-managed data
|
||||||
|
// under workdir_root.
|
||||||
|
func isTaskIDDir(name string) bool {
|
||||||
|
_, err := strconv.ParseUint(name, 10, 64)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isHostScratchDir reports whether name is a per-job host-mode scratch dir:
|
||||||
|
// hex.EncodeToString of 8 random bytes, i.e. exactly 16 lowercase hex chars
|
||||||
|
// (see startHostEnvironment in act/runner/run_context.go). The narrow match
|
||||||
|
// leaves the sibling shared "tool_cache" dir and any operator data untouched.
|
||||||
|
func isHostScratchDir(name string) bool {
|
||||||
|
if len(name) != 16 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, c := range name {
|
||||||
|
if (c < '0' || c > '9') && (c < 'a' || c > 'f') {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupStaleDirs removes immediate child directories of root that match and
|
||||||
|
// whose mtime is older than WorkdirCleanupAge. It is a no-op when root does not
|
||||||
|
// exist yet (the runner has never written there).
|
||||||
|
func (r *Runner) cleanupStaleDirs(ctx context.Context, root string, match func(name string) bool) {
|
||||||
|
entries, err := os.ReadDir(root)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
log.Warnf("failed to list task workspace root %s for stale cleanup: %v", workdirRoot, err)
|
log.Warnf("failed to list directory %s for stale cleanup: %v", root, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// A task may begin between shouldRunIdleCleanup's running-count check and
|
// A task may begin between shouldRunIdleCleanup's running-count check and
|
||||||
// the loop below. That is safe because new task dirs are created with the
|
// the loop below. That is safe because new dirs are created with the
|
||||||
// current mtime and therefore fall on the keep side of cutoff.
|
// current mtime and therefore fall on the keep side of cutoff.
|
||||||
cutoff := r.now().Add(-r.cfg.Runner.WorkdirCleanupAge)
|
cutoff := r.now().Add(-r.cfg.Runner.WorkdirCleanupAge)
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
@@ -176,25 +218,23 @@ func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) {
|
|||||||
if !entry.IsDir() {
|
if !entry.IsDir() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Task workspaces are indexed by numeric task IDs; skip any other
|
if !match(entry.Name()) {
|
||||||
// directories to avoid deleting operator-managed data under workdir_root.
|
|
||||||
if _, err := strconv.ParseUint(entry.Name(), 10, 64); err != nil {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
info, err := entry.Info()
|
info, err := entry.Info()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("failed to stat task workspace %s: %v", filepath.Join(workdirRoot, entry.Name()), err)
|
log.Warnf("failed to stat %s: %v", filepath.Join(root, entry.Name()), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if info.ModTime().After(cutoff) {
|
if info.ModTime().After(cutoff) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
taskDir := filepath.Join(workdirRoot, entry.Name())
|
dir := filepath.Join(root, entry.Name())
|
||||||
if err := os.RemoveAll(taskDir); err != nil {
|
if err := os.RemoveAll(dir); err != nil {
|
||||||
log.Warnf("failed to clean stale task workspace %s: %v", taskDir, err)
|
log.Warnf("failed to clean stale directory %s: %v", dir, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Infof("cleaned stale task workspace %s", taskDir)
|
log.Infof("cleaned stale directory %s", dir)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -394,7 +434,11 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
|
|||||||
ContainerNamePrefix: fmt.Sprintf("GITEA-ACTIONS-TASK-%d", task.Id),
|
ContainerNamePrefix: fmt.Sprintf("GITEA-ACTIONS-TASK-%d", task.Id),
|
||||||
ContainerMaxLifetime: maxLifetime,
|
ContainerMaxLifetime: maxLifetime,
|
||||||
CleanWorkdir: true,
|
CleanWorkdir: true,
|
||||||
ContainerNetworkMode: container.NetworkMode(r.cfg.Container.Network),
|
ContainerNetworkMode: docker_container.NetworkMode(r.cfg.Container.Network),
|
||||||
|
ContainerNetworkCreateOptions: container.NewDockerNetworkCreateExecutorInput{
|
||||||
|
EnableIPv4: r.cfg.Container.NetworkCreateOptions.EnableIPv4,
|
||||||
|
EnableIPv6: r.cfg.Container.NetworkCreateOptions.EnableIPv6,
|
||||||
|
},
|
||||||
ContainerOptions: r.cfg.Container.Options,
|
ContainerOptions: r.cfg.Container.Options,
|
||||||
ContainerDaemonSocket: r.cfg.Container.DockerHost,
|
ContainerDaemonSocket: r.cfg.Container.DockerHost,
|
||||||
Privileged: r.cfg.Container.Privileged,
|
Privileged: r.cfg.Container.Privileged,
|
||||||
|
|||||||
@@ -52,6 +52,55 @@ func TestRunnerCleanupStaleTaskDirs(t *testing.T) {
|
|||||||
assert.DirExists(t, alphaNumericTask)
|
assert.DirExists(t, alphaNumericTask)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestRunnerOnIdleCleansStaleHostScratchDirs covers the host-mode leak path:
|
||||||
|
// a per-job scratch dir (16 hex chars) left behind by a timed-out cleanup must
|
||||||
|
// be reclaimed, while the shared tool_cache and operator data are preserved.
|
||||||
|
func TestRunnerOnIdleCleansStaleHostScratchDirs(t *testing.T) {
|
||||||
|
now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
|
||||||
|
hostRoot := filepath.Join(t.TempDir(), "act")
|
||||||
|
require.NoError(t, os.MkdirAll(hostRoot, 0o700))
|
||||||
|
|
||||||
|
staleScratch := filepath.Join(hostRoot, "0123456789abcdef") // 16 hex
|
||||||
|
freshScratch := filepath.Join(hostRoot, "fedcba9876543210")
|
||||||
|
toolCache := filepath.Join(hostRoot, "tool_cache")
|
||||||
|
operatorData := filepath.Join(hostRoot, "keep-me")
|
||||||
|
for _, path := range []string{staleScratch, freshScratch, toolCache, operatorData} {
|
||||||
|
require.NoError(t, os.MkdirAll(path, 0o700))
|
||||||
|
}
|
||||||
|
require.NoError(t, os.Chtimes(staleScratch, now.Add(-48*time.Hour), now.Add(-48*time.Hour)))
|
||||||
|
require.NoError(t, os.Chtimes(freshScratch, now.Add(-10*time.Minute), now.Add(-10*time.Minute)))
|
||||||
|
require.NoError(t, os.Chtimes(toolCache, now.Add(-72*time.Hour), now.Add(-72*time.Hour)))
|
||||||
|
require.NoError(t, os.Chtimes(operatorData, now.Add(-72*time.Hour), now.Add(-72*time.Hour)))
|
||||||
|
|
||||||
|
r := &Runner{
|
||||||
|
cfg: &config.Config{
|
||||||
|
Host: config.Host{WorkdirParent: hostRoot},
|
||||||
|
Runner: config.Runner{
|
||||||
|
WorkdirCleanupAge: 24 * time.Hour,
|
||||||
|
IdleCleanupInterval: time.Minute,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
now: func() time.Time { return now },
|
||||||
|
}
|
||||||
|
|
||||||
|
r.OnIdle(context.Background())
|
||||||
|
|
||||||
|
assert.NoDirExists(t, staleScratch) // stale scratch reclaimed
|
||||||
|
assert.DirExists(t, freshScratch) // within cleanup age, kept
|
||||||
|
assert.DirExists(t, toolCache) // shared cache, never a scratch match
|
||||||
|
assert.DirExists(t, operatorData) // non-hex name, untouched
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsHostScratchDir(t *testing.T) {
|
||||||
|
assert.True(t, isHostScratchDir("0123456789abcdef"))
|
||||||
|
assert.True(t, isHostScratchDir("ffffffffffffffff"))
|
||||||
|
assert.False(t, isHostScratchDir("tool_cache"))
|
||||||
|
assert.False(t, isHostScratchDir("0123456789ABCDEF")) // hex.EncodeToString is lowercase
|
||||||
|
assert.False(t, isHostScratchDir("0123456789abcde")) // 15 chars
|
||||||
|
assert.False(t, isHostScratchDir("0123456789abcdef0")) // 17 chars
|
||||||
|
assert.False(t, isHostScratchDir("123"))
|
||||||
|
}
|
||||||
|
|
||||||
func TestRunnerCleanupStaleTaskDirsMissingRoot(t *testing.T) {
|
func TestRunnerCleanupStaleTaskDirsMissingRoot(t *testing.T) {
|
||||||
r := &Runner{
|
r := &Runner{
|
||||||
cfg: &config.Config{
|
cfg: &config.Config{
|
||||||
@@ -135,7 +184,10 @@ func TestRunnerShouldRunIdleCleanupSkipsWhenJobRunning(t *testing.T) {
|
|||||||
assert.False(t, r.shouldRunIdleCleanup())
|
assert.False(t, r.shouldRunIdleCleanup())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRunnerShouldRunIdleCleanupSkipsWhenBindWorkdirDisabled(t *testing.T) {
|
// Idle cleanup runs regardless of bind_workdir: host mode (bind_workdir off)
|
||||||
|
// still leaves per-job scratch dirs that the sweep must reclaim.
|
||||||
|
func TestRunnerShouldRunIdleCleanupRunsWithoutBindWorkdir(t *testing.T) {
|
||||||
|
now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
|
||||||
r := &Runner{
|
r := &Runner{
|
||||||
cfg: &config.Config{
|
cfg: &config.Config{
|
||||||
Runner: config.Runner{
|
Runner: config.Runner{
|
||||||
@@ -143,10 +195,10 @@ func TestRunnerShouldRunIdleCleanupSkipsWhenBindWorkdirDisabled(t *testing.T) {
|
|||||||
IdleCleanupInterval: time.Minute,
|
IdleCleanupInterval: time.Minute,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
now: time.Now,
|
now: func() time.Time { return now },
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.False(t, r.shouldRunIdleCleanup())
|
assert.True(t, r.shouldRunIdleCleanup())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRunnerShouldRunIdleCleanupSkipsWhenDisabled(t *testing.T) {
|
func TestRunnerShouldRunIdleCleanupSkipsWhenDisabled(t *testing.T) {
|
||||||
|
|||||||
@@ -40,11 +40,12 @@ runner:
|
|||||||
# The runner uses exponential backoff when idle, increasing the interval up to this maximum.
|
# The runner uses exponential backoff when idle, increasing the interval up to this maximum.
|
||||||
# Set to 0 or same as fetch_interval to disable backoff.
|
# Set to 0 or same as fetch_interval to disable backoff.
|
||||||
fetch_interval_max: 5s
|
fetch_interval_max: 5s
|
||||||
# While idle, remove stale bind-workdir task directories older than this duration.
|
# While idle, remove stale bind-workdir task directories and orphaned host-mode
|
||||||
# Setting either workdir_cleanup_age or idle_cleanup_interval to 0 (or any
|
# scratch directories (left behind when a host cleanup delete stalls) older than
|
||||||
# non-positive value) disables workdir cleanup entirely.
|
# this duration. Setting either workdir_cleanup_age or idle_cleanup_interval to 0
|
||||||
|
# (or any non-positive value) disables stale-directory cleanup entirely.
|
||||||
workdir_cleanup_age: 24h
|
workdir_cleanup_age: 24h
|
||||||
# Cadence for the idle stale bind-workdir cleanup pass.
|
# Cadence for the idle stale-directory cleanup pass.
|
||||||
idle_cleanup_interval: 10m
|
idle_cleanup_interval: 10m
|
||||||
# The base interval for periodic log flush to the Gitea instance.
|
# The base interval for periodic log flush to the Gitea instance.
|
||||||
# Logs may be sent earlier if the buffer reaches log_report_batch_size
|
# Logs may be sent earlier if the buffer reaches log_report_batch_size
|
||||||
@@ -115,6 +116,13 @@ container:
|
|||||||
# If it's empty, runner will create a network automatically.
|
# If it's empty, runner will create a network automatically.
|
||||||
# Deprecated: `network_mode` is still accepted for old configs; use `network` instead.
|
# Deprecated: `network_mode` is still accepted for old configs; use `network` instead.
|
||||||
network: ""
|
network: ""
|
||||||
|
# network_create_options only apply when `network` is left empty and the runner
|
||||||
|
# auto-creates a per-job network that does not already exist. They have no effect
|
||||||
|
# when a custom `network` name is set, because that network is used as-is and never
|
||||||
|
# created by the runner. Omit the entire block to use Docker's defaults.
|
||||||
|
network_create_options:
|
||||||
|
enable_ipv4: true # Omit to use Docker's default (IPv4 enabled). Set false to disable IPv4.
|
||||||
|
enable_ipv6: false # Omit to use Docker's default (IPv6 disabled). Enabling it requires dockerd started with --ipv6.
|
||||||
# Whether to use privileged mode or not when launching task containers (privileged mode is required for Docker-in-Docker).
|
# Whether to use privileged mode or not when launching task containers (privileged mode is required for Docker-in-Docker).
|
||||||
privileged: false
|
privileged: false
|
||||||
# Any other options to be used when the container is started (e.g., --add-host=my.gitea.url:host-gateway).
|
# Any other options to be used when the container is started (e.g., --add-host=my.gitea.url:host-gateway).
|
||||||
|
|||||||
@@ -33,8 +33,8 @@ type Runner struct {
|
|||||||
FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources.
|
FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources.
|
||||||
FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources.
|
FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources.
|
||||||
FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle.
|
FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle.
|
||||||
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories older than this duration during idle cleanup.
|
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories and orphaned host-mode scratch dirs older than this duration during idle cleanup.
|
||||||
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale bind-workdir cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale-directory cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
||||||
LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush.
|
LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush.
|
||||||
LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
|
LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
|
||||||
LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
|
LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
|
||||||
@@ -59,6 +59,7 @@ type Cache struct {
|
|||||||
// Container represents the configuration for the container.
|
// Container represents the configuration for the container.
|
||||||
type Container struct {
|
type Container struct {
|
||||||
Network string `yaml:"network"` // Network specifies the network for the container.
|
Network string `yaml:"network"` // Network specifies the network for the container.
|
||||||
|
NetworkCreateOptions ContainerNetworkCreateOptions `yaml:"network_create_options"` // Add options when the network need to be created by the runner
|
||||||
NetworkMode string `yaml:"network_mode"` // Deprecated: use Network instead. Could be removed after Gitea 1.20
|
NetworkMode string `yaml:"network_mode"` // Deprecated: use Network instead. Could be removed after Gitea 1.20
|
||||||
Privileged bool `yaml:"privileged"` // Privileged indicates whether the container runs in privileged mode.
|
Privileged bool `yaml:"privileged"` // Privileged indicates whether the container runs in privileged mode.
|
||||||
Options string `yaml:"options"` // Options specifies additional options for the container.
|
Options string `yaml:"options"` // Options specifies additional options for the container.
|
||||||
@@ -72,6 +73,11 @@ type Container struct {
|
|||||||
BindWorkdir bool `yaml:"bind_workdir"` // BindWorkdir binds the workspace to the host filesystem instead of using Docker volumes. Required for DinD when jobs use docker compose with bind mounts.
|
BindWorkdir bool `yaml:"bind_workdir"` // BindWorkdir binds the workspace to the host filesystem instead of using Docker volumes. Required for DinD when jobs use docker compose with bind mounts.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ContainerNetworkCreateOptions struct {
|
||||||
|
EnableIPv4 *bool `yaml:"enable_ipv4"` // Enable or disable IPv4 for the network (true for docker by default)
|
||||||
|
EnableIPv6 *bool `yaml:"enable_ipv6"` // Enable or disable IPv6 for the network (false for docker by default)
|
||||||
|
}
|
||||||
|
|
||||||
// Host represents the configuration for the host.
|
// Host represents the configuration for the host.
|
||||||
type Host struct {
|
type Host struct {
|
||||||
WorkdirParent string `yaml:"workdir_parent"` // WorkdirParent specifies the parent directory for the host's working directory.
|
WorkdirParent string `yaml:"workdir_parent"` // WorkdirParent specifies the parent directory for the host's working directory.
|
||||||
|
|||||||
@@ -117,3 +117,50 @@ func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) {
|
|||||||
assert.Contains(t, err.Error(), "parse config file")
|
assert.Contains(t, err.Error(), "parse config file")
|
||||||
assert.NotContains(t, err.Error(), "defaults metadata")
|
assert.NotContains(t, err.Error(), "defaults metadata")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestContainerNetworkCreateOptions(t *testing.T) {
|
||||||
|
// Verify that the enable_ipv4/enable_ipv6 YAML keys unmarshal into the *bool fields,
|
||||||
|
// distinguishing an explicit true/false from an omitted key (nil). A nil here is
|
||||||
|
// forwarded as-is to Docker, which applies its own default.
|
||||||
|
loadOptions := func(t *testing.T, yaml string) ContainerNetworkCreateOptions {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(yaml), 0o600))
|
||||||
|
|
||||||
|
cfg, err := LoadDefault(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return cfg.Container.NetworkCreateOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("enable_ipv6 true unmarshals to non-nil true", func(t *testing.T) {
|
||||||
|
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv6: true\n")
|
||||||
|
require.NotNil(t, opts.EnableIPv6)
|
||||||
|
assert.True(t, *opts.EnableIPv6)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("enable_ipv6 false unmarshals to non-nil false", func(t *testing.T) {
|
||||||
|
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv6: false\n")
|
||||||
|
require.NotNil(t, opts.EnableIPv6)
|
||||||
|
assert.False(t, *opts.EnableIPv6)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("enable_ipv4 false unmarshals to non-nil false", func(t *testing.T) {
|
||||||
|
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv4: false\n")
|
||||||
|
require.NotNil(t, opts.EnableIPv4)
|
||||||
|
assert.False(t, *opts.EnableIPv4)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("omitted keys stay nil", func(t *testing.T) {
|
||||||
|
opts := loadOptions(t, "container:\n network_create_options:\n enable_ipv4: true\n")
|
||||||
|
require.NotNil(t, opts.EnableIPv4)
|
||||||
|
assert.True(t, *opts.EnableIPv4)
|
||||||
|
assert.Nil(t, opts.EnableIPv6, "an omitted enable_ipv6 must remain nil so Docker's default applies")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("omitted block leaves both nil", func(t *testing.T) {
|
||||||
|
opts := loadOptions(t, "container:\n network: \"\"\n")
|
||||||
|
assert.Nil(t, opts.EnableIPv4)
|
||||||
|
assert.Nil(t, opts.EnableIPv6)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user