feat: complete runner-side cancellation handling (#1016)

Completes the runner side of the cancellation flow, superseding #825. Two parts:

### 1. Report cancellations correctly (`fix`)
When `Reporter.Close` ran with the state still `UNSPECIFIED` and the reporter's
context had been cancelled, the synthesised final state attributed the job to
`RESULT_FAILURE` with an "Early termination" log row — misreporting a
cancellation as a generic failure. `Close` now detects the cancelled context
and finalizes the task as `RESULT_CANCELLED`.

### 2. Advertise the `cancelling` capability (`feat`)
[actions-proto-go v0.6.0](https://gitea.com/gitea/actions-proto-go) adds a
`capabilities` field to `RegisterRequest`/`DeclareRequest`, so the runner can
now tell the server it understands the transitional cancelling state:

- Bumps `gitea.dev/actions-proto-go` to `v0.6.0`.
- Adds a single `RunnerCapabilities()` source of truth exposing
  `CapabilityCancelling`.
- Sends `Capabilities` on both register and declare.

With this the server records `HasCancellingSupport` and can rely on the runner
running post-step cleanup before a task is finalized as `RESULT_CANCELLED`.

## Compatibility

Wire-compatible against older servers: the new field uses a previously unused
field number (8 on `RegisterRequest`, 3 on `DeclareRequest`) and the client uses
the binary protobuf codec, so a server predating the field silently ignores it —
registration and declaration succeed and the feature simply stays off. It
activates only once both runner and server are on v0.6.0.

## Server side

The matching Gitea change (read `GetCapabilities()`, persist
`HasCancellingSupport`) is a separate PR against `gitea/gitea`.

Supersedes #825.

Reviewed-on: https://gitea.com/gitea/runner/pulls/1016
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
Reviewed-by: wxiaoguang <29147+wxiaoguang@noreply.gitea.com>
This commit is contained in:
Nicolas
2026-06-11 09:00:31 +00:00
parent 526c46b485
commit 822af5029f
6 changed files with 111 additions and 20 deletions

View File

@@ -14,6 +14,7 @@ import (
"strings"
"time"
"gitea.com/gitea/runner/internal/app/run"
"gitea.com/gitea/runner/internal/pkg/client"
"gitea.com/gitea/runner/internal/pkg/config"
"gitea.com/gitea/runner/internal/pkg/labels"
@@ -365,11 +366,12 @@ func doRegister(ctx context.Context, cfg *config.Config, inputs *registerInputs)
}
// register new runner.
resp, err := cli.Register(ctx, connect.NewRequest(&runnerv1.RegisterRequest{
Name: reg.Name,
Token: reg.Token,
Version: ver.Version(),
Labels: ls,
Ephemeral: reg.Ephemeral,
Name: reg.Name,
Token: reg.Token,
Version: ver.Version(),
Labels: ls,
Ephemeral: reg.Ephemeral,
Capabilities: run.RunnerCapabilities(),
}))
if err != nil {
log.WithError(err).Error("poller: cannot register new runner")

View File

@@ -37,6 +37,18 @@ import (
log "github.com/sirupsen/logrus"
)
// CapabilityCancelling tells the server this runner understands the
// transitional cancelling state and will run post-step cleanup before
// finalizing a task as RESULT_CANCELLED.
const CapabilityCancelling = "cancelling"
// RunnerCapabilities are the capability flags this runner advertises to the
// server during registration and declaration. The server uses them to enable
// transitional features that require runner-side support.
func RunnerCapabilities() []string {
return []string{CapabilityCancelling}
}
// Runner runs the pipeline.
type Runner struct {
name string
@@ -504,7 +516,8 @@ func (r *Runner) RunningCount() int64 {
func (r *Runner) Declare(ctx context.Context, labels []string) (*connect.Response[runnerv1.DeclareResponse], error) {
return r.client.Declare(ctx, connect.NewRequest(&runnerv1.DeclareRequest{
Version: ver.Version(),
Labels: labels,
Version: ver.Version(),
Labels: labels,
Capabilities: RunnerCapabilities(),
}))
}

View File

@@ -391,15 +391,28 @@ func (r *Reporter) Close(lastWords string) error {
r.stateMu.Lock()
r.closed = true
if r.state.Result == runnerv1.Result_RESULT_UNSPECIFIED {
// When r.ctx has been cancelled (server returned RESULT_CANCELLED via
// rpcCtx/ReportState, see line 590) the job is being torn down on the
// cancellation path: surface that explicitly instead of attributing it
// to a generic failure.
cancelled := errors.Is(r.ctx.Err(), context.Canceled)
if lastWords == "" {
lastWords = "Early termination"
if cancelled {
lastWords = "Cancelled"
} else {
lastWords = "Early termination"
}
}
for _, v := range r.state.Steps {
if v.Result == runnerv1.Result_RESULT_UNSPECIFIED {
v.Result = runnerv1.Result_RESULT_CANCELLED
}
}
r.state.Result = runnerv1.Result_RESULT_FAILURE
if cancelled {
r.state.Result = runnerv1.Result_RESULT_CANCELLED
} else {
r.state.Result = runnerv1.Result_RESULT_FAILURE
}
r.logRows = append(r.logRows, &runnerv1.LogRow{
Time: timestamppb.Now(),
Content: lastWords,

View File

@@ -850,3 +850,74 @@ func TestReporter_ServerCancelStillFlushesFinal(t *testing.T) {
assert.True(t, finalLogNoMoreSeen.Load(), "Close() must send a final UpdateLog{NoMore:true} even after server-side cancellation")
assert.True(t, finalTaskStateSeen.Load(), "Close() must send a final UpdateTask with the populated final state even after server-side cancellation")
}
// TestReporter_CloseReportsCancelledOnCanceledCtx asserts that when Close()
// runs on a reporter whose state has not been finalised AND whose context has
// been cancelled, the synthesised final state carries RESULT_CANCELLED and
// the appended log row reads "Cancelled" — not RESULT_FAILURE / "Early
// termination". This is the runner-side half of the Running -> Cancelling ->
// Cancelled flow: it gives Gitea an explicit cancel acknowledgement rather
// than a generic failure when the job is torn down on the cancel path.
func TestReporter_CloseReportsCancelledOnCanceledCtx(t *testing.T) {
var finalState atomic.Pointer[runnerv1.TaskState]
var finalLogRows atomic.Pointer[[]*runnerv1.LogRow]
client := mocks.NewClient(t)
client.On("UpdateLog", mock.Anything, mock.Anything).Return(
func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) {
if req.Msg.NoMore {
rows := append([]*runnerv1.LogRow(nil), req.Msg.Rows...)
finalLogRows.Store(&rows)
}
return connect_go.NewResponse(&runnerv1.UpdateLogResponse{
AckIndex: req.Msg.Index + int64(len(req.Msg.Rows)),
}), nil
},
)
client.On("UpdateTask", mock.Anything, mock.Anything).Return(
func(_ context.Context, req *connect_go.Request[runnerv1.UpdateTaskRequest]) (*connect_go.Response[runnerv1.UpdateTaskResponse], error) {
if req.Msg.State != nil && req.Msg.State.Result != runnerv1.Result_RESULT_UNSPECIFIED {
finalState.Store(req.Msg.State)
}
return connect_go.NewResponse(&runnerv1.UpdateTaskResponse{}), nil
},
)
ctx, cancel := context.WithCancel(context.Background())
taskCtx, err := structpb.NewStruct(map[string]any{})
require.NoError(t, err)
cfg, _ := config.LoadDefault("")
reporter := NewReporter(ctx, cancel, client, &runnerv1.Task{Context: taskCtx}, cfg)
reporter.ResetSteps(1)
// Simulate the cancellation path: r.ctx is cancelled before Close() runs.
cancel()
// Skip the daemon wait inside Close().
close(reporter.daemon)
// Empty lastWords so Close() picks the synthesised value.
require.NoError(t, reporter.Close(""))
got := finalState.Load()
require.NotNil(t, got, "Close() must send a final UpdateTask")
assert.Equal(t, runnerv1.Result_RESULT_CANCELLED, got.Result,
"final Result must be RESULT_CANCELLED when r.ctx is cancelled, not RESULT_FAILURE")
require.Len(t, got.Steps, 1)
assert.Equal(t, runnerv1.Result_RESULT_CANCELLED, got.Steps[0].Result,
"unfinished steps must be marked RESULT_CANCELLED")
rows := finalLogRows.Load()
require.NotNil(t, rows, "Close() must send a final UpdateLog{NoMore:true}")
var foundCancelled, foundEarlyTermination bool
for _, r := range *rows {
if r.Content == "Cancelled" {
foundCancelled = true
}
if r.Content == "Early termination" {
foundEarlyTermination = true
}
}
assert.True(t, foundCancelled, "final log must contain a 'Cancelled' row")
assert.False(t, foundEarlyTermination, "final log must not contain 'Early termination' on the cancel path")
}