mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-21 17:24:23 +02:00
feat: Add optional runner.post_task_script hook after task cleanup (#1026)
- Adds `runner.post_task_script` and `runner.post_task_script_timeout` (default `5m`) to run a host executable after each task’s built-in cleanup (post-steps, container teardown, bind-workdir removal). - Stops task heartbeats via `Reporter.StopHeartbeats()` while the script runs so Gitea won’t assign overlapping work; the final task acknowledgement still happens in `reporter.Close()`. - Script output goes to the runner process log; non-zero exits are warned only and do not change the job result. - Documents lifecycle, offline behavior, timeouts, and Windows limits (`.ps1` not supported yet) in `docs/post-task-script.md`. Reviewed-on: https://gitea.com/gitea/runner/pulls/1026 Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
10
README.md
10
README.md
@@ -209,6 +209,16 @@ When `container.bind_workdir` is enabled, stale task workspace directories can b
|
|||||||
- only purely numeric subdirectories under `container.workdir_parent` are treated as task workspaces and may be removed
|
- only purely numeric subdirectories under `container.workdir_parent` are treated as task workspaces and may be removed
|
||||||
- cleanup assumes `container.workdir_parent` is not shared across multiple runners
|
- cleanup assumes `container.workdir_parent` is not shared across multiple runners
|
||||||
|
|
||||||
|
#### Post-task script (`runner.post_task_script`)
|
||||||
|
|
||||||
|
Optional host script that runs **after** each task's built-in cleanup (post-steps, container teardown, bind-workdir removal). Use it for extra machine housekeeping — Docker pruning, disk cleanup, and similar.
|
||||||
|
|
||||||
|
**While the script runs, the runner stops task heartbeats and stays offline from Gitea's perspective until the script exits (or hits `runner.post_task_script_timeout`, default `5m`).** A script that blocks without exiting keeps the runner from taking new work for up to that timeout. Script output goes to the runner log, not the job log; a non-zero exit is warned but does not change the job result.
|
||||||
|
|
||||||
|
On Windows, use `.exe`, `.bat`, or `.cmd` paths; **PowerShell (`.ps1`) is not supported yet** as the configured path — wrap commands in a `.cmd` file instead.
|
||||||
|
|
||||||
|
See **[docs/post-task-script.md](docs/post-task-script.md)** for lifecycle details, environment variables, timeout interaction, and platform notes.
|
||||||
|
|
||||||
### Example Deployments
|
### Example Deployments
|
||||||
|
|
||||||
Check out the [examples](examples) directory for sample deployment types.
|
Check out the [examples](examples) directory for sample deployment types.
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import (
|
|||||||
"gitea.com/gitea/runner/act/common"
|
"gitea.com/gitea/runner/act/common"
|
||||||
"gitea.com/gitea/runner/act/filecollector"
|
"gitea.com/gitea/runner/act/filecollector"
|
||||||
"gitea.com/gitea/runner/act/lookpath"
|
"gitea.com/gitea/runner/act/lookpath"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/process"
|
||||||
|
|
||||||
"github.com/go-git/go-billy/v5/helper/polyfill"
|
"github.com/go-git/go-billy/v5/helper/polyfill"
|
||||||
"github.com/go-git/go-billy/v5/osfs"
|
"github.com/go-git/go-billy/v5/osfs"
|
||||||
@@ -261,7 +262,7 @@ func setupPty(cmd *exec.Cmd, cmdline string) (*os.File, *os.File, error) {
|
|||||||
cmd.Stdin = tty
|
cmd.Stdin = tty
|
||||||
cmd.Stdout = tty
|
cmd.Stdout = tty
|
||||||
cmd.Stderr = tty
|
cmd.Stderr = tty
|
||||||
cmd.SysProcAttr = getSysProcAttr(cmdline, true)
|
cmd.SysProcAttr = process.SysProcAttr(cmdline, true)
|
||||||
return ppty, tty, nil
|
return ppty, tty, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -321,30 +322,14 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
|||||||
cmd.Env = envList
|
cmd.Env = envList
|
||||||
cmd.Stderr = e.StdOut
|
cmd.Stderr = e.StdOut
|
||||||
cmd.Dir = wd
|
cmd.Dir = wd
|
||||||
cmd.SysProcAttr = getSysProcAttr(cmdline, false)
|
cmd.SysProcAttr = process.SysProcAttr(cmdline, false)
|
||||||
|
|
||||||
// A step often launches a process tree (a shell that starts a child which
|
// Kill the step's whole process tree on cancellation (a step often launches a
|
||||||
// spawns further background or GUI processes). The default context
|
// shell that spawns further background or GUI children) and bound the post-exit
|
||||||
// cancellation only kills the direct child, leaving the rest of the tree
|
// I/O wait, so an orphan inheriting cmd's stdout/stderr pipe can never hang
|
||||||
// running; and because the orphans inherit cmd's stdout/stderr pipe,
|
// cmd.Wait() and the runner. See process.TreeKill. The PTY path below may
|
||||||
// cmd.Wait() would block forever, hanging the runner. Kill the whole tree on
|
// override SysProcAttr, but never touches Cancel/WaitDelay.
|
||||||
// cancellation — via a Job Object on Windows and the process group on Unix
|
treeKill := process.NewTreeKill(cmd)
|
||||||
// (see processKiller) — and bound the wait so a leftover pipe writer can
|
|
||||||
// never hang Wait indefinitely.
|
|
||||||
var killer atomic.Pointer[processKiller]
|
|
||||||
cmd.Cancel = func() error {
|
|
||||||
if k := killer.Load(); k != nil {
|
|
||||||
return k.Kill()
|
|
||||||
}
|
|
||||||
if cmd.Process != nil {
|
|
||||||
return cmd.Process.Kill()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Once the step process has exited, give its I/O pipes at most this long to
|
|
||||||
// drain before Wait force-closes them and returns (Go's WaitDelay). This
|
|
||||||
// also covers a step that backgrounds a process holding the pipe open.
|
|
||||||
cmd.WaitDelay = 10 * time.Second
|
|
||||||
|
|
||||||
var ppty *os.File
|
var ppty *os.File
|
||||||
var tty *os.File
|
var tty *os.File
|
||||||
@@ -375,15 +360,9 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
|
|||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Capture the started process for tree-kill on cancellation: a Job Object on
|
if k, kerr := treeKill.Capture(cmd.Process); kerr != nil {
|
||||||
// Windows (children spawned afterwards are auto-included) and the process
|
|
||||||
// group on Unix. On failure (e.g. Windows nested-job restrictions) we fall
|
|
||||||
// back to the default single-process kill; WaitDelay + end-of-job cleanup
|
|
||||||
// still apply.
|
|
||||||
if k, kerr := newProcessKiller(cmd.Process); kerr != nil {
|
|
||||||
common.Logger(ctx).Warnf("process tree kill setup failed, falling back to single-process kill: %v", kerr)
|
common.Logger(ctx).Warnf("process tree kill setup failed, falling back to single-process kill: %v", kerr)
|
||||||
} else {
|
} else {
|
||||||
killer.Store(k)
|
|
||||||
defer k.Close()
|
defer k.Close()
|
||||||
}
|
}
|
||||||
err = cmd.Wait()
|
err = cmd.Wait()
|
||||||
|
|||||||
@@ -1,29 +0,0 @@
|
|||||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
//go:build plan9
|
|
||||||
|
|
||||||
package container
|
|
||||||
|
|
||||||
import "os"
|
|
||||||
|
|
||||||
// processKiller falls back to single-process termination on platforms without
|
|
||||||
// a process-group / Job Object tree-kill. The Job Object (Windows) and process
|
|
||||||
// group (Unix) based tree-kills live in process_windows.go / process_unix.go;
|
|
||||||
// here we just kill the direct child, matching the previous default behaviour.
|
|
||||||
type processKiller struct {
|
|
||||||
p *os.Process
|
|
||||||
}
|
|
||||||
|
|
||||||
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
|
||||||
return &processKiller{p: p}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k *processKiller) Kill() error {
|
|
||||||
if k == nil || k.p == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return k.p.Kill()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k *processKiller) Close() error { return nil }
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
//go:build !windows && !plan9
|
|
||||||
|
|
||||||
package container
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"os"
|
|
||||||
"syscall"
|
|
||||||
)
|
|
||||||
|
|
||||||
// processKiller terminates a step process together with its whole process
|
|
||||||
// group, which is the Unix counterpart of the Windows Job Object tree-kill.
|
|
||||||
//
|
|
||||||
// Background: a step often launches a process tree (a shell that starts a child
|
|
||||||
// which in turn spawns further background processes). The default
|
|
||||||
// exec.CommandContext cancellation only kills the direct child, so cancelling a
|
|
||||||
// job left the rest of the tree running. Because those orphans inherited the
|
|
||||||
// step's stdout/stderr pipe, cmd.Wait() also blocked forever and the runner
|
|
||||||
// hung.
|
|
||||||
//
|
|
||||||
// Steps are started with Setpgid (or Setsid for the PTY path, see
|
|
||||||
// getSysProcAttr), which makes the step process the leader of a new process
|
|
||||||
// group whose ID equals its PID. Signalling the negative PID delivers to every
|
|
||||||
// process still in that group, so we can tear down the whole tree atomically on
|
|
||||||
// cancellation, which also closes the inherited pipe handles so cmd.Wait() can
|
|
||||||
// return.
|
|
||||||
type processKiller struct {
|
|
||||||
pgid int
|
|
||||||
}
|
|
||||||
|
|
||||||
// newProcessKiller captures the process group of p (an already-started
|
|
||||||
// process). Because the step is launched with Setpgid/Setsid, p is a group
|
|
||||||
// leader and its PGID equals its PID; children spawned afterwards stay in the
|
|
||||||
// same group unless they explicitly create their own.
|
|
||||||
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
|
||||||
return &processKiller{pgid: p.Pid}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kill sends SIGKILL to the entire process group (the step process and every
|
|
||||||
// descendant that stayed in the group). A missing group (ESRCH) means the
|
|
||||||
// processes already exited and is not treated as an error.
|
|
||||||
func (k *processKiller) Kill() error {
|
|
||||||
if k == nil || k.pgid <= 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if err := syscall.Kill(-k.pgid, syscall.SIGKILL); err != nil && !errors.Is(err, syscall.ESRCH) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close is a no-op on Unix; there is no job handle to release.
|
|
||||||
func (k *processKiller) Close() error { return nil }
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
package container
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"golang.org/x/sys/windows"
|
|
||||||
)
|
|
||||||
|
|
||||||
// processKiller terminates a step process together with its entire descendant
|
|
||||||
// tree via a Windows Job Object.
|
|
||||||
//
|
|
||||||
// Background: a step often launches a process tree (a shell that starts a
|
|
||||||
// child which in turn spawns further GUI or background processes). The default
|
|
||||||
// exec.CommandContext cancellation only kills the direct child, so cancelling a
|
|
||||||
// job left the rest of the tree running. Because those orphans inherited the
|
|
||||||
// step's stdout/stderr pipe, cmd.Wait() also blocked forever and the runner hung.
|
|
||||||
//
|
|
||||||
// Assigning the step process to a Job Object lets us kill the whole tree
|
|
||||||
// atomically on cancellation (TerminateJobObject), which also closes the
|
|
||||||
// inherited pipe handles so cmd.Wait() can return.
|
|
||||||
type processKiller struct {
|
|
||||||
job windows.Handle
|
|
||||||
}
|
|
||||||
|
|
||||||
// newProcessKiller creates a Job Object and assigns p (an already-started
|
|
||||||
// process) to it. Children spawned by p afterwards are automatically part of
|
|
||||||
// the job. The job does NOT use JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, so closing
|
|
||||||
// the handle on normal completion does not kill legitimate background
|
|
||||||
// processes; the tree is only torn down by an explicit Kill (cancellation).
|
|
||||||
func newProcessKiller(p *os.Process) (*processKiller, error) {
|
|
||||||
job, err := windows.CreateJobObject(nil, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
h, err := windows.OpenProcess(windows.PROCESS_SET_QUOTA|windows.PROCESS_TERMINATE, false, uint32(p.Pid))
|
|
||||||
if err != nil {
|
|
||||||
windows.CloseHandle(job)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer windows.CloseHandle(h)
|
|
||||||
|
|
||||||
if err := windows.AssignProcessToJobObject(job, h); err != nil {
|
|
||||||
windows.CloseHandle(job)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &processKiller{job: job}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kill terminates every process currently assigned to the job (the step process
|
|
||||||
// and all of its descendants).
|
|
||||||
func (k *processKiller) Kill() error {
|
|
||||||
if k == nil || k.job == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return windows.TerminateJobObject(k.job, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close releases the job handle. It does not terminate the processes.
|
|
||||||
func (k *processKiller) Close() error {
|
|
||||||
if k == nil || k.job == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
h := k.job
|
|
||||||
k.job = 0
|
|
||||||
return windows.CloseHandle(h)
|
|
||||||
}
|
|
||||||
@@ -8,23 +8,10 @@ package container
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/creack/pty"
|
"github.com/creack/pty"
|
||||||
)
|
)
|
||||||
|
|
||||||
func getSysProcAttr(_ string, tty bool) *syscall.SysProcAttr {
|
|
||||||
if tty {
|
|
||||||
return &syscall.SysProcAttr{
|
|
||||||
Setsid: true,
|
|
||||||
Setctty: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &syscall.SysProcAttr{
|
|
||||||
Setpgid: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func openPty() (*os.File, *os.File, error) {
|
func openPty() (*os.File, *os.File, error) {
|
||||||
return pty.Open()
|
return pty.Open()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,15 +7,8 @@ package container
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func getSysProcAttr(cmdLine string, tty bool) *syscall.SysProcAttr {
|
|
||||||
return &syscall.SysProcAttr{
|
|
||||||
Setpgid: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func openPty() (*os.File, *os.File, error) {
|
func openPty() (*os.File, *os.File, error) {
|
||||||
return nil, nil, errors.New("Unsupported")
|
return nil, nil, errors.New("Unsupported")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,15 +7,8 @@ package container
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func getSysProcAttr(cmdLine string, tty bool) *syscall.SysProcAttr {
|
|
||||||
return &syscall.SysProcAttr{
|
|
||||||
Rfork: syscall.RFNOTEG,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func openPty() (*os.File, *os.File, error) {
|
func openPty() (*os.File, *os.File, error) {
|
||||||
return nil, nil, errors.New("Unsupported")
|
return nil, nil, errors.New("Unsupported")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,13 +7,8 @@ package container
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func getSysProcAttr(cmdLine string, tty bool) *syscall.SysProcAttr {
|
|
||||||
return &syscall.SysProcAttr{CmdLine: cmdLine, CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP}
|
|
||||||
}
|
|
||||||
|
|
||||||
func openPty() (*os.File, *os.File, error) {
|
func openPty() (*os.File, *os.File, error) {
|
||||||
return nil, nil, errors.New("Unsupported")
|
return nil, nil, errors.New("Unsupported")
|
||||||
}
|
}
|
||||||
|
|||||||
155
docs/post-task-script.md
Normal file
155
docs/post-task-script.md
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
# Post-task script
|
||||||
|
|
||||||
|
The post-task script is an optional host hook that runs **once after every task**, after the runner has already finished its normal per-task cleanup. Typical uses include pruning Docker images, vacuuming ephemeral disks, or resetting VM state between jobs.
|
||||||
|
|
||||||
|
It is configured under `runner.post_task_script` in the runner YAML config (see [config.example.yaml](../internal/pkg/config/config.example.yaml)).
|
||||||
|
|
||||||
|
## When it runs
|
||||||
|
|
||||||
|
For each task, execution order is:
|
||||||
|
|
||||||
|
1. Workflow runs (steps, actions, containers).
|
||||||
|
2. In-job cleanup (action `post:` steps, container stop/remove).
|
||||||
|
3. Job outputs are reported to Gitea.
|
||||||
|
4. Bind-workdir workspace removal, when `container.bind_workdir` is enabled.
|
||||||
|
5. **Post-task script** (this hook).
|
||||||
|
6. Final task acknowledgement to Gitea (`reporter.Close()`).
|
||||||
|
|
||||||
|
The script is **additive**: it does not replace any built-in cleanup. When `container.bind_workdir` is enabled, the task workspace directory has usually already been deleted before the script starts. `GITEA_WORKSPACE` is still set to the path the job used, for reference.
|
||||||
|
|
||||||
|
## Runner stays offline until the script finishes
|
||||||
|
|
||||||
|
This is the most important operational detail.
|
||||||
|
|
||||||
|
When the post-task script starts, the runner **stops sending task heartbeats** to Gitea (the same mechanism used during cancel/cleanup). From Gitea's perspective, the runner is **not available for new work** until:
|
||||||
|
|
||||||
|
1. The script exits (success or failure), **and**
|
||||||
|
2. The runner sends the final task flush to Gitea.
|
||||||
|
|
||||||
|
While the script runs:
|
||||||
|
|
||||||
|
- **Gitea will not assign another task** to this runner for the current job slot (heartbeats are stopped).
|
||||||
|
- **The runner capacity slot stays occupied** locally — with `capacity: 1`, the poller will not start another task until the script completes.
|
||||||
|
- **Runner shutdown** (`shutdown_timeout`) counts this phase as part of the in-flight task; a long or stuck script delays graceful shutdown.
|
||||||
|
|
||||||
|
If the script **never exits**, the runner remains in this state until `runner.post_task_script_timeout` elapses (default **5 minutes** when a script is configured). The runner then kills the script process and proceeds to the final acknowledgement. Until that timeout fires, **the runner effectively stays offline**.
|
||||||
|
|
||||||
|
Set `post_task_script_timeout` to a value that matches how long your housekeeping is allowed to take — not how long you wish it could take. Prefer short, bounded scripts.
|
||||||
|
|
||||||
|
### Recommendations
|
||||||
|
|
||||||
|
- Keep scripts **fast and bounded** (seconds, not minutes).
|
||||||
|
- Avoid interactive prompts, blocking network calls without timeouts, or waiting on user input.
|
||||||
|
- Use **idempotent** operations (the script may run after success, failure, or cancellation).
|
||||||
|
- Test failure modes: hung script, non-zero exit, missing executable.
|
||||||
|
- Watch the **runner process log** for script output (it is not written to the Gitea job log).
|
||||||
|
- On shutdown, ensure scripts respond to process termination within `post_task_script_timeout`.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
runner:
|
||||||
|
# Path to an executable on the host. Empty or omitted disables the hook.
|
||||||
|
post_task_script: /usr/local/bin/gitea-post-task.sh
|
||||||
|
|
||||||
|
# Hard limit on script runtime. Default when post_task_script is set: 5m.
|
||||||
|
# If the script exceeds this, it is killed and the runner continues.
|
||||||
|
post_task_script_timeout: 2m
|
||||||
|
```
|
||||||
|
|
||||||
|
| Option | Default | Description |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `runner.post_task_script` | *(disabled)* | Host path to the script or binary. Relative paths are resolved from the runner process working directory. |
|
||||||
|
| `runner.post_task_script_timeout` | `5m` (only when script is set) | Maximum time the script may run before the runner kills it and moves on. |
|
||||||
|
|
||||||
|
The script must be **executable** on the host (shebang on Linux/macOS, or a native `.exe` / `.bat` / `.cmd` on Windows). **PowerShell (`.ps1`) is not supported yet** as the value of `post_task_script`; the runner executes the configured path directly and does not invoke `powershell.exe` for you.
|
||||||
|
|
||||||
|
`gitea-runner exec` does **not** load runner YAML and will not run this hook.
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
|
The script receives `runner.envs` / `runner.env_file` values plus:
|
||||||
|
|
||||||
|
| Variable | Description |
|
||||||
|
| --- | --- |
|
||||||
|
| `GITEA_TASK_ID` | Numeric task ID. |
|
||||||
|
| `GITEA_RUN_ID` | Workflow run ID, when provided by the server. |
|
||||||
|
| `GITEA_REPOSITORY` | Repository slug (`owner/name`). |
|
||||||
|
| `GITEA_WORKSPACE` | Workspace path used for the job (may already be deleted). |
|
||||||
|
| `GITEA_JOB_RESULT` | `success`, `failure`, `cancelled`, `skipped`, or `unknown`. |
|
||||||
|
|
||||||
|
The script environment is **not** a full copy of the job container environment. System variables such as `PATH` are only present if you define them in `runner.envs` or `runner.env_file`.
|
||||||
|
|
||||||
|
## Output and errors
|
||||||
|
|
||||||
|
- **Stdout/stderr** are written to the **runner process log** (logrus), prefixed with `post-task script stdout:` / `post-task script stderr:`.
|
||||||
|
- **Non-zero exit codes** are logged as warnings only. They do **not** change the job result already reported to Gitea.
|
||||||
|
- **Timeouts and start failures** are logged as warnings; the runner still completes the task acknowledgement.
|
||||||
|
|
||||||
|
## Interaction with other timeouts
|
||||||
|
|
||||||
|
| Timeout | Effect on post-task script |
|
||||||
|
| --- | --- |
|
||||||
|
| `runner.post_task_script_timeout` | Kills the script if it runs too long. This is the **only** timeout that bounds the script. |
|
||||||
|
| `runner.timeout` | Caps the task **up to** the script. The script detaches from the task deadline, so a job near the runner timeout limit does **not** cut the script short — it still gets its full `post_task_script_timeout`. |
|
||||||
|
| `runner.shutdown_timeout` | On SIGINT/SIGTERM, bounds how long the runner waits for the **task** to finish. The post-task script detaches from cancellation, so it is **not** interrupted by this window and may extend shutdown until its own `post_task_script_timeout` elapses. |
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Linux — prune dangling Docker resources
|
||||||
|
|
||||||
|
`/usr/local/bin/gitea-post-task.sh`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
docker image prune -f
|
||||||
|
docker builder prune -f --filter 'until=24h'
|
||||||
|
```
|
||||||
|
|
||||||
|
`config.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
runner:
|
||||||
|
post_task_script: /usr/local/bin/gitea-post-task.sh
|
||||||
|
post_task_script_timeout: 3m
|
||||||
|
```
|
||||||
|
|
||||||
|
### Windows — batch file (`.cmd`)
|
||||||
|
|
||||||
|
Use a `.cmd` or `.bat` file. PowerShell scripts are **not supported yet** as `post_task_script`; call PowerShell from a batch wrapper if needed:
|
||||||
|
|
||||||
|
`C:\gitea-runner\scripts\post-task.cmd`:
|
||||||
|
|
||||||
|
```bat
|
||||||
|
@echo off
|
||||||
|
docker image prune -f
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
runner:
|
||||||
|
post_task_script: C:\gitea-runner\scripts\post-task.cmd
|
||||||
|
post_task_script_timeout: 3m
|
||||||
|
```
|
||||||
|
|
||||||
|
PowerShell workaround until native `.ps1` support exists:
|
||||||
|
|
||||||
|
`C:\gitea-runner\scripts\post-task.cmd`:
|
||||||
|
|
||||||
|
```bat
|
||||||
|
@echo off
|
||||||
|
powershell.exe -NoProfile -NonInteractive -ExecutionPolicy Bypass -File "%~dp0post-task.ps1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Windows notes
|
||||||
|
|
||||||
|
- Supported as `post_task_script`: `.exe`, `.bat`, `.cmd`.
|
||||||
|
- **Not supported yet:** `.ps1` as the configured path (use a `.cmd` wrapper; see above).
|
||||||
|
- `.sh` files require a Unix shell on the PATH unless you point `post_task_script` at the interpreter.
|
||||||
|
- Use backslashes or forward slashes in YAML paths; both work in Go on Windows.
|
||||||
|
|
||||||
|
## See also
|
||||||
|
|
||||||
|
- [Configuration](../README.md#configuration) — generating and loading `config.yaml`
|
||||||
|
- [config.example.yaml](../internal/pkg/config/config.example.yaml) — all runner options
|
||||||
|
- Bind-workdir idle cleanup (`runner.workdir_cleanup_age`) — separate from this hook; runs only when the runner is idle
|
||||||
2
go.sum
2
go.sum
@@ -253,8 +253,6 @@ golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
|||||||
golang.org/x/sys v0.46.0 h1:noSf2Fq6F8DBgS+LysIkx7rIExoNHJsxOAtPp4rthXw=
|
golang.org/x/sys v0.46.0 h1:noSf2Fq6F8DBgS+LysIkx7rIExoNHJsxOAtPp4rthXw=
|
||||||
golang.org/x/sys v0.46.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
golang.org/x/sys v0.46.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4=
|
|
||||||
golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk=
|
|
||||||
golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc=
|
golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc=
|
||||||
golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y=
|
golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y=
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
|||||||
132
internal/app/run/post_task_script.go
Normal file
132
internal/app/run/post_task_script.go
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package run
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.com/gitea/runner/act/common"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/config"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/metrics"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/process"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/report"
|
||||||
|
|
||||||
|
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (r *Runner) runPostTaskScript(ctx context.Context, reporter *report.Reporter, task *runnerv1.Task, workdir string) {
|
||||||
|
script := r.cfg.Runner.PostTaskScript
|
||||||
|
if script == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout := r.cfg.Runner.PostTaskScriptTimeout
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = config.DefaultPostTaskScriptTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
scriptCtx, cancel := postTaskScriptContext(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
env := r.postTaskScriptEnv(reporter, task, workdir)
|
||||||
|
log.Infof("running post-task script %q for task %d", script, task.Id)
|
||||||
|
|
||||||
|
cmd := exec.CommandContext(scriptCtx, script)
|
||||||
|
cmd.Env = envListFromMap(env)
|
||||||
|
cmd.SysProcAttr = process.SysProcAttr(script, false)
|
||||||
|
|
||||||
|
stdout := postTaskScriptLogWriter("stdout")
|
||||||
|
stderr := postTaskScriptLogWriter("stderr")
|
||||||
|
cmd.Stdout = stdout
|
||||||
|
cmd.Stderr = stderr
|
||||||
|
|
||||||
|
// Kill the script's whole process tree on cancellation and bound the post-exit
|
||||||
|
// I/O wait, so a backgrounded child inheriting cmd's stdout/stderr pipe can
|
||||||
|
// never hang cmd.Wait() and the runner. See process.TreeKill.
|
||||||
|
treeKill := process.NewTreeKill(cmd)
|
||||||
|
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
log.Warnf("post-task script %q for task %d: %v", script, task.Id, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if k, kerr := treeKill.Capture(cmd.Process); kerr != nil {
|
||||||
|
log.Warnf("post-task script %q for task %d: process tree kill setup failed, falling back to single-process kill: %v", script, task.Id, kerr)
|
||||||
|
} else {
|
||||||
|
defer k.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
err := cmd.Wait()
|
||||||
|
// Flush any trailing, not-yet-newline-terminated output now that the I/O
|
||||||
|
// copiers have finished (cmd.Wait, bounded by WaitDelay above, guarantees it).
|
||||||
|
common.FlushWriter(stdout)
|
||||||
|
common.FlushWriter(stderr)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
log.Warnf("post-task script %q for task %d: %v", script, task.Id, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var exitErr *exec.ExitError
|
||||||
|
if errors.As(err, &exitErr) {
|
||||||
|
log.Warnf("post-task script %q for task %d exited with code %d", script, task.Id, exitErr.ExitCode())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Warnf("post-task script %q for task %d: %v", script, task.Id, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func postTaskScriptContext(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
|
||||||
|
// Detach from the task context's deadline and cancellation: the task has
|
||||||
|
// already finished by the time the post-task script runs, so the script must
|
||||||
|
// get its full configured timeout. Inheriting the task deadline would silently
|
||||||
|
// truncate that budget when the job completed close to its own timeout (and an
|
||||||
|
// already-cancelled task context would skip the script entirely).
|
||||||
|
// context.WithoutCancel keeps the context values while dropping the deadline.
|
||||||
|
return context.WithTimeout(context.WithoutCancel(ctx), timeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Runner) postTaskScriptEnv(reporter *report.Reporter, task *runnerv1.Task, workdir string) map[string]string {
|
||||||
|
env := r.cloneEnvs()
|
||||||
|
env["GITEA_TASK_ID"] = strconv.FormatInt(task.Id, 10)
|
||||||
|
env["GITEA_WORKSPACE"] = workdir
|
||||||
|
// GITEA_JOB_RESULT shares the runner's canonical result vocabulary
|
||||||
|
// (success/failure/cancelled/skipped/unknown), the same strings the reporter
|
||||||
|
// parses and the metrics labels use.
|
||||||
|
env["GITEA_JOB_RESULT"] = metrics.ResultToStatusLabel(reporter.Result())
|
||||||
|
if v := task.Context.Fields["run_id"].GetStringValue(); v != "" {
|
||||||
|
env["GITEA_RUN_ID"] = v
|
||||||
|
}
|
||||||
|
if v := task.Context.Fields["repository"].GetStringValue(); v != "" {
|
||||||
|
env["GITEA_REPOSITORY"] = v
|
||||||
|
}
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
|
||||||
|
func envListFromMap(env map[string]string) []string {
|
||||||
|
envList := make([]string, 0, len(env))
|
||||||
|
for k, v := range env {
|
||||||
|
envList = append(envList, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
return envList
|
||||||
|
}
|
||||||
|
|
||||||
|
// postTaskScriptLogWriter returns an io.Writer that logs the script's output one
|
||||||
|
// line at a time, tagged with the stream name. It is passed as cmd.Stdout/Stderr
|
||||||
|
// (rather than a StdoutPipe) so that cmd.WaitDelay governs the copying goroutine:
|
||||||
|
// a backgrounded process holding the pipe open can never block cmd.Wait()
|
||||||
|
// indefinitely. Flush any trailing partial line with common.FlushWriter after
|
||||||
|
// cmd.Wait() returns.
|
||||||
|
func postTaskScriptLogWriter(stream string) io.Writer {
|
||||||
|
return common.NewLineWriter(func(line string) bool {
|
||||||
|
log.Infof("post-task script %s: %s", stream, strings.TrimRight(line, "\r\n"))
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
}
|
||||||
157
internal/app/run/post_task_script_test.go
Normal file
157
internal/app/run/post_task_script_test.go
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package run
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/config"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/metrics"
|
||||||
|
"gitea.com/gitea/runner/internal/pkg/report"
|
||||||
|
|
||||||
|
runnerv1 "gitea.dev/actions-proto-go/runner/v1"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"google.golang.org/protobuf/types/known/structpb"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRunPostTaskScriptSkippedWhenEmpty(t *testing.T) {
|
||||||
|
r := &Runner{
|
||||||
|
cfg: &config.Config{},
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
taskCtx, err := structpb.NewStruct(map[string]any{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
task := &runnerv1.Task{Id: 1, Context: taskCtx}
|
||||||
|
reporter := report.NewReporter(ctx, cancel, nil, task, r.cfg)
|
||||||
|
|
||||||
|
require.NotPanics(t, func() {
|
||||||
|
r.runPostTaskScript(ctx, reporter, task, "/workspace/owner/repo")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunPostTaskScriptNonZeroExitDoesNotPanic(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
scriptPath := filepath.Join(dir, "fail.sh")
|
||||||
|
require.NoError(t, os.WriteFile(scriptPath, []byte("#!/bin/sh\nexit 2\n"), 0o700))
|
||||||
|
|
||||||
|
cfg, err := config.LoadDefault("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
cfg.Runner.PostTaskScript = scriptPath
|
||||||
|
|
||||||
|
r := &Runner{cfg: cfg}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
taskCtx, err := structpb.NewStruct(map[string]any{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
task := &runnerv1.Task{Id: 1, Context: taskCtx}
|
||||||
|
reporter := report.NewReporter(ctx, cancel, nil, task, cfg)
|
||||||
|
|
||||||
|
require.NotPanics(t, func() {
|
||||||
|
r.runPostTaskScript(ctx, reporter, task, "/workspace/owner/repo")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPostTaskScriptContextUsesFullTimeout(t *testing.T) {
|
||||||
|
const timeout = 5 * time.Minute
|
||||||
|
|
||||||
|
// A task context that finished close to its own deadline must not truncate the
|
||||||
|
// script's budget: the script should still get its full configured timeout.
|
||||||
|
near, cancelNear := context.WithTimeout(context.Background(), time.Second)
|
||||||
|
defer cancelNear()
|
||||||
|
scriptCtx, cancel := postTaskScriptContext(near, timeout)
|
||||||
|
defer cancel()
|
||||||
|
deadline, ok := scriptCtx.Deadline()
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Greater(t, time.Until(deadline), time.Minute, "script timeout truncated to task deadline")
|
||||||
|
|
||||||
|
// An already-cancelled task context must not cancel the script either.
|
||||||
|
cancelledCtx, cancelIt := context.WithCancel(context.Background())
|
||||||
|
cancelIt()
|
||||||
|
scriptCtx2, cancel2 := postTaskScriptContext(cancelledCtx, timeout)
|
||||||
|
defer cancel2()
|
||||||
|
assert.NoError(t, scriptCtx2.Err(), "script context inherited the cancelled task context")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPostTaskScriptEnv(t *testing.T) {
|
||||||
|
cfg, err := config.LoadDefault("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r := &Runner{
|
||||||
|
cfg: cfg,
|
||||||
|
envs: map[string]string{"BASE": "1"},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
taskCtx, err := structpb.NewStruct(map[string]any{
|
||||||
|
"run_id": "99",
|
||||||
|
"repository": "acme/widget",
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
task := &runnerv1.Task{Id: 3, Context: taskCtx}
|
||||||
|
reporter := report.NewReporter(ctx, cancel, nil, task, cfg)
|
||||||
|
setReporterJobResult(t, reporter, runnerv1.Result_RESULT_FAILURE)
|
||||||
|
|
||||||
|
env := r.postTaskScriptEnv(reporter, task, "/tmp/workspace")
|
||||||
|
assert.Equal(t, "1", env["BASE"])
|
||||||
|
assert.Equal(t, "3", env["GITEA_TASK_ID"])
|
||||||
|
assert.Equal(t, "99", env["GITEA_RUN_ID"])
|
||||||
|
assert.Equal(t, "acme/widget", env["GITEA_REPOSITORY"])
|
||||||
|
assert.Equal(t, "/tmp/workspace", env["GITEA_WORKSPACE"])
|
||||||
|
assert.Equal(t, "failure", env["GITEA_JOB_RESULT"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunPostTaskScriptIntegration(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
outFile := filepath.Join(dir, "out.txt")
|
||||||
|
scriptPath := filepath.Join(dir, "post-task.sh")
|
||||||
|
script := "#!/bin/sh\nprintf '%s %s %s' \"$GITEA_TASK_ID\" \"$GITEA_JOB_RESULT\" \"$CUSTOM\" > \"" + outFile + "\"\n"
|
||||||
|
require.NoError(t, os.WriteFile(scriptPath, []byte(script), 0o700))
|
||||||
|
|
||||||
|
cfg, err := config.LoadDefault("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
cfg.Runner.PostTaskScript = scriptPath
|
||||||
|
|
||||||
|
r := &Runner{
|
||||||
|
cfg: cfg,
|
||||||
|
envs: map[string]string{"CUSTOM": "runner-env"},
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
taskCtx, err := structpb.NewStruct(map[string]any{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
task := &runnerv1.Task{Id: 11, Context: taskCtx}
|
||||||
|
reporter := report.NewReporter(ctx, cancel, nil, task, cfg)
|
||||||
|
setReporterJobResult(t, reporter, runnerv1.Result_RESULT_SUCCESS)
|
||||||
|
|
||||||
|
r.runPostTaskScript(ctx, reporter, task, "/workspace/acme/repo")
|
||||||
|
|
||||||
|
content, err := os.ReadFile(outFile)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "11 success runner-env", string(content))
|
||||||
|
}
|
||||||
|
|
||||||
|
func setReporterJobResult(t *testing.T, reporter *report.Reporter, result runnerv1.Result) {
|
||||||
|
t.Helper()
|
||||||
|
require.NoError(t, reporter.Fire(&log.Entry{
|
||||||
|
Time: time.Now(),
|
||||||
|
Message: "job finished",
|
||||||
|
Data: log.Fields{
|
||||||
|
"stage": "Post",
|
||||||
|
"jobResult": metrics.ResultToStatusLabel(result),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
}
|
||||||
@@ -475,6 +475,9 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reporter.StopHeartbeats()
|
||||||
|
r.runPostTaskScript(ctx, reporter, task, workdir)
|
||||||
|
|
||||||
return execErr
|
return execErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -83,6 +83,21 @@ runner:
|
|||||||
# terminal; tools like `docker build` emit redrawing progress frames into the captured log
|
# terminal; tools like `docker build` emit redrawing progress frames into the captured log
|
||||||
# when a TTY is present.
|
# when a TTY is present.
|
||||||
allocate_pty: false
|
allocate_pty: false
|
||||||
|
# Optional executable on the host, run once after each task's built-in cleanup
|
||||||
|
# (post-steps, container teardown, bind-workdir removal). Additive only.
|
||||||
|
#
|
||||||
|
# IMPORTANT: While this script runs the runner stops task heartbeats and stays
|
||||||
|
# offline from Gitea's perspective until the script exits. A script that never
|
||||||
|
# returns blocks new work until post_task_script_timeout kills it (default 5m).
|
||||||
|
# Keep scripts short; set post_task_script_timeout to a safe upper bound.
|
||||||
|
#
|
||||||
|
# Output -> runner process log (not the job log). Non-zero exit -> warning only.
|
||||||
|
# Windows: use .exe, .bat, or .cmd. PowerShell (.ps1) is not supported yet as
|
||||||
|
# the configured path; wrap PowerShell commands in a .cmd file instead.
|
||||||
|
# Full guide: docs/post-task-script.md
|
||||||
|
post_task_script: ''
|
||||||
|
# Hard limit on post_task_script runtime. Default if omitted: 5m.
|
||||||
|
post_task_script_timeout: 5m
|
||||||
|
|
||||||
cache:
|
cache:
|
||||||
# Enable the built-in cache server (used by actions/cache and similar actions).
|
# Enable the built-in cache server (used by actions/cache and similar actions).
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ import (
|
|||||||
"go.yaml.in/yaml/v4"
|
"go.yaml.in/yaml/v4"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DefaultPostTaskScriptTimeout is the fallback cap on how long the post-task
|
||||||
|
// script may run when post_task_script is set without an explicit timeout. It is
|
||||||
|
// applied both at config load (for a configured script) and at the point of use
|
||||||
|
// (so a programmatically built config still gets a sane bound).
|
||||||
|
const DefaultPostTaskScriptTimeout = 5 * time.Minute
|
||||||
|
|
||||||
// Log represents the configuration for logging.
|
// Log represents the configuration for logging.
|
||||||
type Log struct {
|
type Log struct {
|
||||||
Level string `yaml:"level"` // Level indicates the logging level.
|
Level string `yaml:"level"` // Level indicates the logging level.
|
||||||
@@ -23,26 +29,28 @@ type Log struct {
|
|||||||
|
|
||||||
// Runner represents the configuration for the runner.
|
// Runner represents the configuration for the runner.
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
File string `yaml:"file"` // File specifies the file path for the runner.
|
File string `yaml:"file"` // File specifies the file path for the runner.
|
||||||
Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner.
|
Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner.
|
||||||
Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner.
|
Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner.
|
||||||
EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner.
|
EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner.
|
||||||
Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout.
|
Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout.
|
||||||
ShutdownTimeout time.Duration `yaml:"shutdown_timeout"` // ShutdownTimeout specifies the duration to wait for running jobs to complete during a shutdown of the runner.
|
ShutdownTimeout time.Duration `yaml:"shutdown_timeout"` // ShutdownTimeout specifies the duration to wait for running jobs to complete during a shutdown of the runner.
|
||||||
Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode.
|
Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode.
|
||||||
FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources.
|
FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources.
|
||||||
FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources.
|
FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources.
|
||||||
FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle.
|
FetchIntervalMax time.Duration `yaml:"fetch_interval_max"` // FetchIntervalMax specifies the maximum backoff interval when idle.
|
||||||
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories and orphaned host-mode scratch dirs older than this duration during idle cleanup.
|
WorkdirCleanupAge time.Duration `yaml:"workdir_cleanup_age"` // WorkdirCleanupAge removes stale bind-workdir task directories and orphaned host-mode scratch dirs older than this duration during idle cleanup.
|
||||||
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale-directory cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
IdleCleanupInterval time.Duration `yaml:"idle_cleanup_interval"` // IdleCleanupInterval runs stale-directory cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
|
||||||
LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush.
|
LogReportInterval time.Duration `yaml:"log_report_interval"` // LogReportInterval specifies the base interval for periodic log flush.
|
||||||
LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
|
LogReportMaxLatency time.Duration `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
|
||||||
LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
|
LogReportBatchSize int `yaml:"log_report_batch_size"` // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
|
||||||
StateReportInterval time.Duration `yaml:"state_report_interval"` // StateReportInterval specifies the interval for state reporting.
|
StateReportInterval time.Duration `yaml:"state_report_interval"` // StateReportInterval specifies the interval for state reporting.
|
||||||
ReportCloseTimeout time.Duration `yaml:"report_close_timeout"` // ReportCloseTimeout caps each RPC attempt when flushing the final logs and task state at job completion, on a detached context so a server cancel can't block the acknowledgement.
|
ReportCloseTimeout time.Duration `yaml:"report_close_timeout"` // ReportCloseTimeout caps each RPC attempt when flushing the final logs and task state at job completion, on a detached context so a server cancel can't block the acknowledgement.
|
||||||
Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup
|
Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup
|
||||||
GithubMirror string `yaml:"github_mirror"` // GithubMirror defines what mirrors should be used when using github
|
GithubMirror string `yaml:"github_mirror"` // GithubMirror defines what mirrors should be used when using github
|
||||||
AllocatePTY bool `yaml:"allocate_pty"` // AllocatePTY allocates a pseudo-TTY for each step's process. Default is false, matching GitHub's actions/runner. Enable only for jobs that need an interactive terminal; tools like docker build emit redrawing progress frames into the captured log when a TTY is present. Applies to both host and docker backends.
|
AllocatePTY bool `yaml:"allocate_pty"` // AllocatePTY allocates a pseudo-TTY for each step's process. Default is false, matching GitHub's actions/runner. Enable only for jobs that need an interactive terminal; tools like docker build emit redrawing progress frames into the captured log when a TTY is present. Applies to both host and docker backends.
|
||||||
|
PostTaskScript string `yaml:"post_task_script"` // PostTaskScript is the path to an executable script run on the host after each task's cleanup completes. Empty disables the hook. On Windows use .exe/.bat/.cmd; PowerShell (.ps1) is not supported yet as the configured path.
|
||||||
|
PostTaskScriptTimeout time.Duration `yaml:"post_task_script_timeout"` // PostTaskScriptTimeout caps how long the post-task script may run. Default is 5m when post_task_script is set.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache represents the configuration for caching.
|
// Cache represents the configuration for caching.
|
||||||
@@ -193,6 +201,9 @@ func LoadDefault(file string) (*Config, error) {
|
|||||||
if cfg.Runner.ReportCloseTimeout <= 0 {
|
if cfg.Runner.ReportCloseTimeout <= 0 {
|
||||||
cfg.Runner.ReportCloseTimeout = 10 * time.Second
|
cfg.Runner.ReportCloseTimeout = 10 * time.Second
|
||||||
}
|
}
|
||||||
|
if cfg.Runner.PostTaskScript != "" && cfg.Runner.PostTaskScriptTimeout <= 0 {
|
||||||
|
cfg.Runner.PostTaskScriptTimeout = DefaultPostTaskScriptTimeout
|
||||||
|
}
|
||||||
if cfg.Metrics.Addr == "" {
|
if cfg.Metrics.Addr == "" {
|
||||||
cfg.Metrics.Addr = "127.0.0.1:9101"
|
cfg.Metrics.Addr = "127.0.0.1:9101"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -107,6 +107,34 @@ runner:
|
|||||||
// TestLoadDefault_MalformedYAMLReturnsParseError pins the error surfaced for
|
// TestLoadDefault_MalformedYAMLReturnsParseError pins the error surfaced for
|
||||||
// invalid YAML to the canonical "parse config file" message rather than the
|
// invalid YAML to the canonical "parse config file" message rather than the
|
||||||
// "for defaults metadata" variant — i.e. the main yaml.Unmarshal runs first.
|
// "for defaults metadata" variant — i.e. the main yaml.Unmarshal runs first.
|
||||||
|
func TestLoadDefault_LoadsPostTaskScript(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(`
|
||||||
|
runner:
|
||||||
|
post_task_script: /usr/local/bin/post-task.sh
|
||||||
|
post_task_script_timeout: 2m
|
||||||
|
`), 0o600))
|
||||||
|
|
||||||
|
cfg, err := LoadDefault(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "/usr/local/bin/post-task.sh", cfg.Runner.PostTaskScript)
|
||||||
|
assert.Equal(t, 2*time.Minute, cfg.Runner.PostTaskScriptTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadDefault_DefaultsPostTaskScriptTimeout(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(`
|
||||||
|
runner:
|
||||||
|
post_task_script: /usr/local/bin/post-task.sh
|
||||||
|
`), 0o600))
|
||||||
|
|
||||||
|
cfg, err := LoadDefault(path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, 5*time.Minute, cfg.Runner.PostTaskScriptTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) {
|
func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "config.yaml")
|
path := filepath.Join(dir, "config.yaml")
|
||||||
|
|||||||
29
internal/pkg/process/killer_plan9.go
Normal file
29
internal/pkg/process/killer_plan9.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build plan9
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import "os"
|
||||||
|
|
||||||
|
// Killer falls back to single-process termination on platforms without a
|
||||||
|
// process-group / Job Object tree-kill. The Job Object (Windows) and process
|
||||||
|
// group (Unix) based tree-kills live in killer_windows.go / killer_unix.go;
|
||||||
|
// here we just kill the direct child, matching the previous default behaviour.
|
||||||
|
type Killer struct {
|
||||||
|
p *os.Process
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewKiller(p *os.Process) (*Killer, error) {
|
||||||
|
return &Killer{p: p}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *Killer) Kill() error {
|
||||||
|
if k == nil || k.p == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return k.p.Kill()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *Killer) Close() error { return nil }
|
||||||
56
internal/pkg/process/killer_unix.go
Normal file
56
internal/pkg/process/killer_unix.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build !windows && !plan9
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Killer terminates a started process together with its whole process group,
|
||||||
|
// which is the Unix counterpart of the Windows Job Object tree-kill.
|
||||||
|
//
|
||||||
|
// Background: a process (a step or a post-task script) often launches a process
|
||||||
|
// tree (a shell that starts a child which in turn spawns further background
|
||||||
|
// processes). The default exec.CommandContext cancellation only kills the
|
||||||
|
// direct child, so cancelling left the rest of the tree running. Because those
|
||||||
|
// orphans inherited the parent's stdout/stderr pipe, cmd.Wait() also blocked
|
||||||
|
// forever and the runner hung.
|
||||||
|
//
|
||||||
|
// Processes are started with Setpgid (or Setsid for the PTY path, see
|
||||||
|
// SysProcAttr), which makes the process the leader of a new process group whose
|
||||||
|
// ID equals its PID. Signalling the negative PID delivers to every process
|
||||||
|
// still in that group, so we can tear down the whole tree atomically on
|
||||||
|
// cancellation, which also closes the inherited pipe handles so cmd.Wait() can
|
||||||
|
// return.
|
||||||
|
type Killer struct {
|
||||||
|
pgid int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewKiller captures the process group of p (an already-started process).
|
||||||
|
// Because the process is launched with Setpgid/Setsid, p is a group leader and
|
||||||
|
// its PGID equals its PID; children spawned afterwards stay in the same group
|
||||||
|
// unless they explicitly create their own.
|
||||||
|
func NewKiller(p *os.Process) (*Killer, error) {
|
||||||
|
return &Killer{pgid: p.Pid}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill sends SIGKILL to the entire process group (the process and every
|
||||||
|
// descendant that stayed in the group). A missing group (ESRCH) means the
|
||||||
|
// processes already exited and is not treated as an error.
|
||||||
|
func (k *Killer) Kill() error {
|
||||||
|
if k == nil || k.pgid <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := syscall.Kill(-k.pgid, syscall.SIGKILL); err != nil && !errors.Is(err, syscall.ESRCH) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close is a no-op on Unix; there is no job handle to release.
|
||||||
|
func (k *Killer) Close() error { return nil }
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
//go:build !windows && !plan9
|
//go:build !windows && !plan9
|
||||||
|
|
||||||
package container
|
package process
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -47,11 +47,12 @@ func processAlive(pid int) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestProcessKillerKillsTree verifies that a process group captured by the
|
// TestKillerKillsTree verifies that a process group captured by the killer is
|
||||||
// killer is terminated together with a child the step spawns afterwards. This
|
// terminated together with a child the process spawns afterwards. This mirrors
|
||||||
// mirrors a step that launches a child which spawns further processes, where
|
// a step or post-task script that launches a child which spawns further
|
||||||
// cancelling the job must take down the whole tree, not just the direct child.
|
// processes, where cancelling must take down the whole tree, not just the
|
||||||
func TestProcessKillerKillsTree(t *testing.T) {
|
// direct child.
|
||||||
|
func TestKillerKillsTree(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
pidFile := filepath.Join(dir, "child.pid")
|
pidFile := filepath.Join(dir, "child.pid")
|
||||||
|
|
||||||
@@ -60,8 +61,8 @@ func TestProcessKillerKillsTree(t *testing.T) {
|
|||||||
// child stays in the parent's process group, so the group kill must reach it.
|
// child stays in the parent's process group, so the group kill must reach it.
|
||||||
script := fmt.Sprintf(`sleep 600 & echo $! > %q; sleep 600`, pidFile)
|
script := fmt.Sprintf(`sleep 600 & echo $! > %q; sleep 600`, pidFile)
|
||||||
cmd := exec.Command("/bin/sh", "-c", script)
|
cmd := exec.Command("/bin/sh", "-c", script)
|
||||||
// Launch as its own process-group leader, exactly like a real step does (see
|
// Launch as its own process-group leader, exactly like a real process does
|
||||||
// getSysProcAttr), so the killer's PGID == the process PID.
|
// (see SysProcAttr), so the killer's PGID == the process PID.
|
||||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||||
require.NoError(t, cmd.Start())
|
require.NoError(t, cmd.Start())
|
||||||
t.Cleanup(func() {
|
t.Cleanup(func() {
|
||||||
@@ -69,7 +70,7 @@ func TestProcessKillerKillsTree(t *testing.T) {
|
|||||||
_ = cmd.Wait()
|
_ = cmd.Wait()
|
||||||
})
|
})
|
||||||
|
|
||||||
killer, err := newProcessKiller(cmd.Process)
|
killer, err := NewKiller(cmd.Process)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
defer killer.Close()
|
defer killer.Close()
|
||||||
|
|
||||||
72
internal/pkg/process/killer_windows.go
Normal file
72
internal/pkg/process/killer_windows.go
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Killer terminates a started process together with its entire descendant tree
|
||||||
|
// via a Windows Job Object.
|
||||||
|
//
|
||||||
|
// Background: a process (a step or a post-task script) often launches a process
|
||||||
|
// tree (a shell that starts a child which in turn spawns further GUI or
|
||||||
|
// background processes). The default exec.CommandContext cancellation only kills
|
||||||
|
// the direct child, so cancelling left the rest of the tree running. Because
|
||||||
|
// those orphans inherited the parent's stdout/stderr pipe, cmd.Wait() also
|
||||||
|
// blocked forever and the runner hung.
|
||||||
|
//
|
||||||
|
// Assigning the process to a Job Object lets us kill the whole tree atomically
|
||||||
|
// on cancellation (TerminateJobObject), which also closes the inherited pipe
|
||||||
|
// handles so cmd.Wait() can return.
|
||||||
|
type Killer struct {
|
||||||
|
job windows.Handle
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewKiller creates a Job Object and assigns p (an already-started process) to
|
||||||
|
// it. Children spawned by p afterwards are automatically part of the job. The
|
||||||
|
// job does NOT use JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, so closing the handle on
|
||||||
|
// normal completion does not kill legitimate background processes; the tree is
|
||||||
|
// only torn down by an explicit Kill (cancellation).
|
||||||
|
func NewKiller(p *os.Process) (*Killer, error) {
|
||||||
|
job, err := windows.CreateJobObject(nil, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
h, err := windows.OpenProcess(windows.PROCESS_SET_QUOTA|windows.PROCESS_TERMINATE, false, uint32(p.Pid))
|
||||||
|
if err != nil {
|
||||||
|
windows.CloseHandle(job)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer windows.CloseHandle(h)
|
||||||
|
|
||||||
|
if err := windows.AssignProcessToJobObject(job, h); err != nil {
|
||||||
|
windows.CloseHandle(job)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Killer{job: job}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill terminates every process currently assigned to the job (the process and
|
||||||
|
// all of its descendants).
|
||||||
|
func (k *Killer) Kill() error {
|
||||||
|
if k == nil || k.job == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return windows.TerminateJobObject(k.job, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close releases the job handle. It does not terminate the processes.
|
||||||
|
func (k *Killer) Close() error {
|
||||||
|
if k == nil || k.job == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
h := k.job
|
||||||
|
k.job = 0
|
||||||
|
return windows.CloseHandle(h)
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
package container
|
package process
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -32,11 +32,11 @@ func processAlive(pid int) bool {
|
|||||||
return code == stillActive
|
return code == stillActive
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestProcessKillerKillsTree verifies that a process assigned to the Job Object
|
// TestKillerKillsTree verifies that a process assigned to the Job Object is
|
||||||
// is terminated together with a child it spawns afterwards. This mirrors a step
|
// terminated together with a child it spawns afterwards. This mirrors a step or
|
||||||
// that launches a child which spawns further processes, where cancelling the
|
// post-task script that launches a child which spawns further processes, where
|
||||||
// job must take down the whole tree, not just the direct child.
|
// cancelling must take down the whole tree, not just the direct child.
|
||||||
func TestProcessKillerKillsTree(t *testing.T) {
|
func TestKillerKillsTree(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
pidFile := filepath.Join(dir, "child.pid")
|
pidFile := filepath.Join(dir, "child.pid")
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ func TestProcessKillerKillsTree(t *testing.T) {
|
|||||||
require.NoError(t, cmd.Start())
|
require.NoError(t, cmd.Start())
|
||||||
t.Cleanup(func() { _ = cmd.Process.Kill() })
|
t.Cleanup(func() { _ = cmd.Process.Kill() })
|
||||||
|
|
||||||
killer, err := newProcessKiller(cmd.Process)
|
killer, err := NewKiller(cmd.Process)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
defer killer.Close()
|
defer killer.Close()
|
||||||
|
|
||||||
17
internal/pkg/process/sysprocattr_plan9.go
Normal file
17
internal/pkg/process/sysprocattr_plan9.go
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build plan9
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import "syscall"
|
||||||
|
|
||||||
|
// SysProcAttr returns the platform attributes used to start a process. Plan 9
|
||||||
|
// has no process-group tree-kill (see Killer), so we only request a new rfork
|
||||||
|
// note group here.
|
||||||
|
func SysProcAttr(cmdLine string, tty bool) *syscall.SysProcAttr {
|
||||||
|
return &syscall.SysProcAttr{
|
||||||
|
Rfork: syscall.RFNOTEG,
|
||||||
|
}
|
||||||
|
}
|
||||||
24
internal/pkg/process/sysprocattr_unix.go
Normal file
24
internal/pkg/process/sysprocattr_unix.go
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
//go:build !windows && !plan9
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import "syscall"
|
||||||
|
|
||||||
|
// SysProcAttr returns the platform attributes used to start a process so that a
|
||||||
|
// Killer can later tear down its whole process tree. On Unix the process becomes
|
||||||
|
// the leader of a new process group (or session, for the PTY path), so a
|
||||||
|
// signal to the negative PID reaches every descendant that stayed in the group.
|
||||||
|
func SysProcAttr(_ string, tty bool) *syscall.SysProcAttr {
|
||||||
|
if tty {
|
||||||
|
return &syscall.SysProcAttr{
|
||||||
|
Setsid: true,
|
||||||
|
Setctty: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &syscall.SysProcAttr{
|
||||||
|
Setpgid: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
14
internal/pkg/process/sysprocattr_windows.go
Normal file
14
internal/pkg/process/sysprocattr_windows.go
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import "syscall"
|
||||||
|
|
||||||
|
// SysProcAttr returns the platform attributes used to start a process so that a
|
||||||
|
// Killer can later tear down its whole process tree. On Windows the process is
|
||||||
|
// placed in a new process group; the descendant tree is reclaimed via the Job
|
||||||
|
// Object set up by NewKiller.
|
||||||
|
func SysProcAttr(cmdLine string, tty bool) *syscall.SysProcAttr {
|
||||||
|
return &syscall.SysProcAttr{CmdLine: cmdLine, CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP}
|
||||||
|
}
|
||||||
66
internal/pkg/process/treekill.go
Normal file
66
internal/pkg/process/treekill.go
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package process
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// treeKillWaitDelay bounds how long Wait lingers for the command's I/O pipes to
|
||||||
|
// drain after the process exits before force-closing them and returning. It also
|
||||||
|
// covers a command that backgrounds a process holding a pipe open after a clean
|
||||||
|
// exit.
|
||||||
|
const treeKillWaitDelay = 10 * time.Second
|
||||||
|
|
||||||
|
// TreeKill wires an exec.Cmd so that cancelling it tears down the command's
|
||||||
|
// whole process tree (see Killer) rather than only the direct child, and bounds
|
||||||
|
// the post-exit I/O wait so a leftover pipe writer can never hang cmd.Wait.
|
||||||
|
//
|
||||||
|
// Background: a command often launches a process tree (a shell that starts a
|
||||||
|
// child which spawns further background processes). The default
|
||||||
|
// exec.CommandContext cancellation only kills the direct child, leaving the rest
|
||||||
|
// of the tree running; and because the orphans inherit cmd's stdout/stderr pipe,
|
||||||
|
// cmd.Wait() would block forever, hanging the caller.
|
||||||
|
//
|
||||||
|
// Callers still set cmd.SysProcAttr (via SysProcAttr) themselves, because the
|
||||||
|
// value differs between the plain and PTY execution paths.
|
||||||
|
type TreeKill struct {
|
||||||
|
killer atomic.Pointer[Killer]
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTreeKill sets cmd.Cancel and cmd.WaitDelay. Call it before cmd.Start, then
|
||||||
|
// call Capture once after a successful Start.
|
||||||
|
func NewTreeKill(cmd *exec.Cmd) *TreeKill {
|
||||||
|
t := &TreeKill{}
|
||||||
|
cmd.Cancel = func() error {
|
||||||
|
if k := t.killer.Load(); k != nil {
|
||||||
|
return k.Kill()
|
||||||
|
}
|
||||||
|
if cmd.Process != nil {
|
||||||
|
return cmd.Process.Kill()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cmd.WaitDelay = treeKillWaitDelay
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture assigns the started process (and the descendants it spawns) to a
|
||||||
|
// Killer so cancellation can reach the whole tree — a Job Object on Windows
|
||||||
|
// (children spawned afterwards are auto-included) and the process group on Unix.
|
||||||
|
// Call it once after cmd.Start. On failure the command falls back to the default
|
||||||
|
// single-process kill and the returned error is for logging only; WaitDelay
|
||||||
|
// still bounds the wait. The returned Killer should be closed when the command
|
||||||
|
// finishes (Close is nil-safe).
|
||||||
|
func (t *TreeKill) Capture(p *os.Process) (*Killer, error) {
|
||||||
|
k, err := NewKiller(p)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
t.killer.Store(k)
|
||||||
|
return k, nil
|
||||||
|
}
|
||||||
@@ -44,11 +44,13 @@ type Reporter struct {
|
|||||||
// so the gauge skips no-op Set calls when the buffer size is unchanged.
|
// so the gauge skips no-op Set calls when the buffer size is unchanged.
|
||||||
lastLogBufferRows int
|
lastLogBufferRows int
|
||||||
|
|
||||||
state *runnerv1.TaskState
|
state *runnerv1.TaskState
|
||||||
stateChanged bool
|
stateChanged bool
|
||||||
stateMu sync.RWMutex
|
stateMu sync.RWMutex
|
||||||
outputs sync.Map
|
outputs sync.Map
|
||||||
daemon chan struct{}
|
daemon chan struct{}
|
||||||
|
heartbeatStop chan struct{}
|
||||||
|
heartbeatStopOnce sync.Once
|
||||||
|
|
||||||
// Unix-nanos of the last successful UpdateTask. Atomic so the heartbeat
|
// Unix-nanos of the last successful UpdateTask. Atomic so the heartbeat
|
||||||
// guard in ReportState reads it without contending stateMu.
|
// guard in ReportState reads it without contending stateMu.
|
||||||
@@ -99,7 +101,8 @@ func NewReporter(ctx context.Context, cancel context.CancelFunc, client client.C
|
|||||||
state: &runnerv1.TaskState{
|
state: &runnerv1.TaskState{
|
||||||
Id: task.Id,
|
Id: task.Id,
|
||||||
},
|
},
|
||||||
daemon: make(chan struct{}),
|
daemon: make(chan struct{}),
|
||||||
|
heartbeatStop: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
if task.Secrets["ACTIONS_STEP_DEBUG"] == "true" {
|
if task.Secrets["ACTIONS_STEP_DEBUG"] == "true" {
|
||||||
@@ -273,6 +276,15 @@ func (r *Reporter) RunDaemon() {
|
|||||||
go r.runDaemonLoop()
|
go r.runDaemonLoop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StopHeartbeats stops periodic UpdateTask heartbeats without cancelling the
|
||||||
|
// task context. Close() still delivers the final flush. Safe to call multiple
|
||||||
|
// times and when the context is already cancelled.
|
||||||
|
func (r *Reporter) StopHeartbeats() {
|
||||||
|
r.heartbeatStopOnce.Do(func() {
|
||||||
|
close(r.heartbeatStop)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (r *Reporter) stopLatencyTimer(active *bool, timer *time.Timer) {
|
func (r *Reporter) stopLatencyTimer(active *bool, timer *time.Timer) {
|
||||||
if *active {
|
if *active {
|
||||||
if !timer.Stop() {
|
if !timer.Stop() {
|
||||||
@@ -339,6 +351,12 @@ func (r *Reporter) runDaemonLoop() {
|
|||||||
// delivers the final flush on a detached context (flushFinal).
|
// delivers the final flush on a detached context (flushFinal).
|
||||||
close(r.daemon)
|
close(r.daemon)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
case <-r.heartbeatStop:
|
||||||
|
// Stop heartbeating during post-task script execution. Close() still
|
||||||
|
// delivers the final flush on a detached context (flushFinal).
|
||||||
|
close(r.daemon)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
r.stateMu.RLock()
|
r.stateMu.RLock()
|
||||||
|
|||||||
@@ -921,3 +921,65 @@ func TestReporter_CloseReportsCancelledOnCanceledCtx(t *testing.T) {
|
|||||||
assert.True(t, foundCancelled, "final log must contain a 'Cancelled' row")
|
assert.True(t, foundCancelled, "final log must contain a 'Cancelled' row")
|
||||||
assert.False(t, foundEarlyTermination, "final log must not contain 'Early termination' on the cancel path")
|
assert.False(t, foundEarlyTermination, "final log must not contain 'Early termination' on the cancel path")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestReporter_StopHeartbeats verifies that StopHeartbeats ends periodic
|
||||||
|
// UpdateTask heartbeats while Close() still flushes the final state.
|
||||||
|
func TestReporter_StopHeartbeats(t *testing.T) {
|
||||||
|
var updateTaskCalls atomic.Int64
|
||||||
|
|
||||||
|
client := mocks.NewClient(t)
|
||||||
|
client.On("UpdateLog", mock.Anything, mock.Anything).Maybe().Return(
|
||||||
|
func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) {
|
||||||
|
return connect_go.NewResponse(&runnerv1.UpdateLogResponse{
|
||||||
|
AckIndex: req.Msg.Index + int64(len(req.Msg.Rows)),
|
||||||
|
}), nil
|
||||||
|
},
|
||||||
|
)
|
||||||
|
client.On("UpdateTask", mock.Anything, mock.Anything).Return(
|
||||||
|
func(_ context.Context, _ *connect_go.Request[runnerv1.UpdateTaskRequest]) (*connect_go.Response[runnerv1.UpdateTaskResponse], error) {
|
||||||
|
updateTaskCalls.Add(1)
|
||||||
|
return connect_go.NewResponse(&runnerv1.UpdateTaskResponse{}), nil
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
taskCtx, err := structpb.NewStruct(map[string]any{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
cfg, err := config.LoadDefault("")
|
||||||
|
require.NoError(t, err)
|
||||||
|
cfg.Runner.StateReportInterval = 20 * time.Millisecond
|
||||||
|
cfg.Runner.LogReportInterval = time.Hour
|
||||||
|
|
||||||
|
reporter := NewReporter(ctx, cancel, client, &runnerv1.Task{Context: taskCtx}, cfg)
|
||||||
|
reporter.ResetSteps(1)
|
||||||
|
reporter.RunDaemon()
|
||||||
|
|
||||||
|
reporter.stateMu.Lock()
|
||||||
|
reporter.stateChanged = true
|
||||||
|
reporter.state.Result = runnerv1.Result_RESULT_SUCCESS
|
||||||
|
reporter.state.StoppedAt = timestamppb.Now()
|
||||||
|
reporter.stateMu.Unlock()
|
||||||
|
|
||||||
|
require.Eventually(t, func() bool {
|
||||||
|
return updateTaskCalls.Load() >= 1
|
||||||
|
}, time.Second, 5*time.Millisecond, "daemon must send at least one UpdateTask before StopHeartbeats")
|
||||||
|
|
||||||
|
beforeStop := updateTaskCalls.Load()
|
||||||
|
reporter.StopHeartbeats()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-reporter.daemon:
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
t.Fatal("StopHeartbeats must stop the daemon loop")
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(3 * cfg.Runner.StateReportInterval)
|
||||||
|
assert.Equal(t, beforeStop, updateTaskCalls.Load(),
|
||||||
|
"UpdateTask must not be called after StopHeartbeats")
|
||||||
|
|
||||||
|
require.NoError(t, reporter.Close(""))
|
||||||
|
assert.Greater(t, updateTaskCalls.Load(), beforeStop,
|
||||||
|
"Close() must still send a final UpdateTask after StopHeartbeats")
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user