fix: prevent loss of step log output at end of step (#1028)

## Problem

Several runner code paths could drop the **tail** of a step's log output, so a
failing (or cancelled) step would show output that is missing its last line(s).
This was observed in practice and traced to four independent issues.

## Root causes & fixes

### 1. Trailing line without a newline was never flushed
`common.lineWriter` buffers output until it sees a `\n`. A final line **without**
a trailing newline (e.g. an error message printed right before a process exits,
a panic, `printf` without `\n`) stayed in the internal buffer and was never
emitted — the writer exposed no flush at all.

- Added `lineWriter.Flush()` (idempotent), a `Flusher` interface, and a
  `FlushWriter(io.Writer)` helper.
- Flush at every stream EOF: the exec copy goroutine, the container `attach()`
  streaming goroutine, and at step end (`useStepLogger`).

### 2. Cancellation/timeout truncated output
`waitForCommand` returned immediately on `ctx.Done()` and abandoned the
output-copy goroutine, losing output the command had already produced. It now
drains with a bounded grace period before returning. The response channel is
buffered so the goroutine can't leak if the drain times out.

### 3. `attach()` raced the final bytes
Container output was streamed in a fire-and-forget goroutine that `wait()` did
not synchronize with, so the step could proceed before the last bytes were
written. `wait()` now blocks on the streaming goroutine (bounded) so output is
fully drained and flushed first.

### 4. `::stop-commands::` silently dropped lines from the step log
Lines between `::stop-commands::<token>` and its end token were echoed without
the `raw_output` field **and** short-circuited the handler chain (`return false`),
so they never reached the step log (non-raw entries aren't appended while a step
is running). Now returns `true` so they are still captured.

Reviewed-on: https://gitea.com/gitea/runner/pulls/1028
Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
Nicolas
2026-06-14 20:43:19 +00:00
parent 33e6d1d8ff
commit 205af7cd01
7 changed files with 216 additions and 6 deletions

View File

@@ -20,6 +20,7 @@ import (
"slices"
"strconv"
"strings"
"time"
"gitea.com/gitea/runner/act/common"
"gitea.com/gitea/runner/act/filecollector"
@@ -45,6 +46,13 @@ import (
"github.com/spf13/pflag"
)
// drainGracePeriod bounds how long we wait for an output-copy goroutine to
// finish draining a container's output before returning, so that neither a
// cancellation (waitForCommand) nor a normal container exit (wait) truncates
// the tail of the log. It is a safety bound: in the common case the stream
// reaches EOF and the goroutine returns well before this elapses.
const drainGracePeriod = 2 * time.Second
// NewContainer creates a reference to a container
func NewContainer(input *NewContainerInput) ExecutionsEnvironment {
cr := new(containerReference)
@@ -229,6 +237,10 @@ type containerReference struct {
input *NewContainerInput
UID int
GID int
// attachDone is closed by the attach() streaming goroutine once it has
// drained and flushed the container's output. wait() blocks on it so the
// tail of the log lands before the step proceeds.
attachDone chan struct{}
LinuxContainerEnvironmentExtensions
}
@@ -730,7 +742,9 @@ func (cr *containerReference) tryReadGID() common.Executor {
func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal bool, resp client.HijackedResponse, _ client.ExecCreateResult, _, _ string) error {
logger := common.Logger(ctx)
cmdResponse := make(chan error)
// Buffered so the copy goroutine never blocks on send if the grace-period
// drain below times out and no one is left to receive.
cmdResponse := make(chan error, 1)
go func() {
var outWriter io.Writer
@@ -749,6 +763,11 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
} else {
_, err = io.Copy(outWriter, resp.Reader)
}
// Flush any buffered, not-yet-newline-terminated trailing line so the
// final line of a command's output is not lost (e.g. an error message
// printed without a trailing newline before the process exits).
common.FlushWriter(outWriter)
common.FlushWriter(errWriter)
cmdResponse <- err
}()
@@ -760,6 +779,16 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
logger.Warnf("Failed to send CTRL+C: %+s", err)
}
// Give the copy goroutine a brief grace period to drain output already
// produced by the command before we return, so cancellation does not
// truncate the tail of the log. The goroutine exits once the hijacked
// stream is closed by resp.Close() in the caller's defer.
select {
case <-cmdResponse:
case <-time.After(drainGracePeriod):
logger.Warn("Timed out draining command output after cancellation")
}
// we return the context canceled error to prevent other steps
// from executing
return ctx.Err()
@@ -945,14 +974,23 @@ func (cr *containerReference) attach() common.Executor {
if errWriter == nil {
errWriter = os.Stderr
}
done := make(chan struct{})
cr.attachDone = done
go func() {
defer close(done)
var copyErr error
if !isTerminal || os.Getenv("NORAW") != "" {
_, err = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
_, copyErr = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
} else {
_, err = io.Copy(outWriter, out.Reader)
_, copyErr = io.Copy(outWriter, out.Reader)
}
if err != nil {
common.Logger(ctx).Error(err)
// Flush any buffered, not-yet-newline-terminated trailing line once
// the stream reaches EOF, so the final line of the container's
// output is not lost when it is not newline-terminated.
common.FlushWriter(outWriter)
common.FlushWriter(errWriter)
if copyErr != nil {
common.Logger(ctx).Error(copyErr)
}
}()
return nil
@@ -991,6 +1029,18 @@ func (cr *containerReference) wait() common.Executor {
logger.Debugf("Return status: %v", statusCode)
// The container has exited; wait for the attach() streaming goroutine to
// finish draining and flushing its output before returning, so the tail
// of the log is not lost. Bounded so a stuck stream cannot hang the step.
if cr.attachDone != nil {
select {
case <-cr.attachDone:
case <-time.After(drainGracePeriod):
logger.Warn("Timed out draining container output")
}
cr.attachDone = nil
}
if statusCode == 0 {
return nil
}