From 3e139b7f091d48b6c599442cfc3c0e411295ddf3 Mon Sep 17 00:00:00 2001 From: silverwind Date: Thu, 19 Feb 2026 04:05:54 +0100 Subject: [PATCH] fix: prevent RunDaemon from sending completed state before Close sends final logs Split ReportState into a public method that skips when closed (used by RunDaemon) and a private reportState that always sends (used by Close). This prevents the server from deleting ephemeral runners before final logs are uploaded. Add test reproducing the exact interleaving from #793. Co-Authored-By: Claude Opus 4.6 --- internal/pkg/report/reporter.go | 13 ++++- internal/pkg/report/reporter_test.go | 79 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/internal/pkg/report/reporter.go b/internal/pkg/report/reporter.go index 81bea3f..b1ef369 100644 --- a/internal/pkg/report/reporter.go +++ b/internal/pkg/report/reporter.go @@ -261,7 +261,7 @@ func (r *Reporter) Close(lastWords string) error { if err := r.ReportLog(true); err != nil { return err } - return r.ReportState() + return r.reportState() }, retry.Context(r.ctx)) } @@ -300,7 +300,18 @@ func (r *Reporter) ReportLog(noMore bool) error { return nil } +// ReportState reports the current task state unless the reporter is closed. func (r *Reporter) ReportState() error { + r.stateMu.RLock() + closed := r.closed + r.stateMu.RUnlock() + if closed { + return nil + } + return r.reportState() +} + +func (r *Reporter) reportState() error { r.clientM.Lock() defer r.clientM.Unlock() diff --git a/internal/pkg/report/reporter_test.go b/internal/pkg/report/reporter_test.go index a7f0714..4d16e64 100644 --- a/internal/pkg/report/reporter_test.go +++ b/internal/pkg/report/reporter_test.go @@ -5,6 +5,7 @@ package report import ( "context" + "fmt" "strings" "sync" "testing" @@ -17,6 +18,7 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/structpb" + "google.golang.org/protobuf/types/known/timestamppb" "gitea.com/gitea/act_runner/internal/pkg/client/mocks" ) @@ -198,6 +200,83 @@ func TestReporter_Fire(t *testing.T) { }) } +// TestReporter_EphemeralRunnerDeletion reproduces the exact scenario from +// https://gitea.com/gitea/act_runner/issues/793: +// +// 1. RunDaemon calls ReportLog(false) — runner is still alive +// 2. Close() updates state to Result=FAILURE (between RunDaemon's ReportLog and ReportState) +// 3. RunDaemon's ReportState() would clone the completed state and send it, +// but the fix makes ReportState return early when closed, preventing this +// 4. Close's ReportLog(true) succeeds because the runner was not deleted +func TestReporter_EphemeralRunnerDeletion(t *testing.T) { + runnerDeleted := false + + client := mocks.NewClient(t) + client.On("UpdateLog", mock.Anything, mock.Anything).Return( + func(_ context.Context, req *connect_go.Request[runnerv1.UpdateLogRequest]) (*connect_go.Response[runnerv1.UpdateLogResponse], error) { + if runnerDeleted { + return nil, fmt.Errorf("runner has been deleted") + } + return connect_go.NewResponse(&runnerv1.UpdateLogResponse{ + AckIndex: req.Msg.Index + int64(len(req.Msg.Rows)), + }), nil + }, + ) + client.On("UpdateTask", mock.Anything, mock.Anything).Maybe().Return( + func(_ context.Context, req *connect_go.Request[runnerv1.UpdateTaskRequest]) (*connect_go.Response[runnerv1.UpdateTaskResponse], error) { + // Server deletes ephemeral runner when it receives a completed state + if req.Msg.State != nil && req.Msg.State.Result != runnerv1.Result_RESULT_UNSPECIFIED { + runnerDeleted = true + } + return connect_go.NewResponse(&runnerv1.UpdateTaskResponse{}), nil + }, + ) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + taskCtx, err := structpb.NewStruct(map[string]interface{}{}) + require.NoError(t, err) + reporter := NewReporter(ctx, cancel, client, &runnerv1.Task{Context: taskCtx}) + reporter.ResetSteps(1) + + // Fire a log entry to create pending data + assert.NoError(t, reporter.Fire(&log.Entry{ + Message: "build output", + Data: log.Fields{"stage": "Main", "stepNumber": 0, "raw_output": true}, + })) + + // Step 1: RunDaemon calls ReportLog(false) — runner is still alive + assert.NoError(t, reporter.ReportLog(false)) + + // Step 2: Close() updates state — sets Result=FAILURE and marks steps cancelled. + // In the real race, this happens while RunDaemon is between ReportLog and ReportState. + reporter.stateMu.Lock() + reporter.closed = true + for _, v := range reporter.state.Steps { + if v.Result == runnerv1.Result_RESULT_UNSPECIFIED { + v.Result = runnerv1.Result_RESULT_CANCELLED + } + } + reporter.state.Result = runnerv1.Result_RESULT_FAILURE + reporter.logRows = append(reporter.logRows, &runnerv1.LogRow{ + Time: timestamppb.Now(), + Content: "Early termination", + }) + reporter.state.StoppedAt = timestamppb.Now() + reporter.stateMu.Unlock() + + // Step 3: RunDaemon's ReportState() — with the fix, this returns early + // because closed=true, preventing the server from deleting the runner. + assert.NoError(t, reporter.ReportState()) + assert.False(t, runnerDeleted, "runner must not be deleted by RunDaemon's ReportState") + + // Step 4: Close's final log upload succeeds because the runner is still alive. + // Flush pending rows first, then send the noMore signal (matching Close's retry behavior). + assert.NoError(t, reporter.ReportLog(false)) + err = reporter.ReportLog(true) + assert.NoError(t, err, "final log upload must not fail: runner should not be deleted before Close finishes sending logs") +} + func TestReporter_RunDaemonClose_Race(t *testing.T) { client := mocks.NewClient(t) client.On("UpdateLog", mock.Anything, mock.Anything).Return(