mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-15 14:24:22 +02:00
fix: prevent loss of step log output at end of step (#1028)
## Problem Several runner code paths could drop the **tail** of a step's log output, so a failing (or cancelled) step would show output that is missing its last line(s). This was observed in practice and traced to four independent issues. ## Root causes & fixes ### 1. Trailing line without a newline was never flushed `common.lineWriter` buffers output until it sees a `\n`. A final line **without** a trailing newline (e.g. an error message printed right before a process exits, a panic, `printf` without `\n`) stayed in the internal buffer and was never emitted — the writer exposed no flush at all. - Added `lineWriter.Flush()` (idempotent), a `Flusher` interface, and a `FlushWriter(io.Writer)` helper. - Flush at every stream EOF: the exec copy goroutine, the container `attach()` streaming goroutine, and at step end (`useStepLogger`). ### 2. Cancellation/timeout truncated output `waitForCommand` returned immediately on `ctx.Done()` and abandoned the output-copy goroutine, losing output the command had already produced. It now drains with a bounded grace period before returning. The response channel is buffered so the goroutine can't leak if the drain times out. ### 3. `attach()` raced the final bytes Container output was streamed in a fire-and-forget goroutine that `wait()` did not synchronize with, so the step could proceed before the last bytes were written. `wait()` now blocks on the streaming goroutine (bounded) so output is fully drained and flushed first. ### 4. `::stop-commands::` silently dropped lines from the step log Lines between `::stop-commands::<token>` and its end token were echoed without the `raw_output` field **and** short-circuited the handler chain (`return false`), so they never reached the step log (non-raw entries aren't appended while a step is running). Now returns `true` so they are still captured. Reviewed-on: https://gitea.com/gitea/runner/pulls/1028 Reviewed-by: Zettat123 <39446+zettat123@noreply.gitea.com>
This commit is contained in:
@@ -20,6 +20,7 @@ import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
"gitea.com/gitea/runner/act/filecollector"
|
||||
@@ -45,6 +46,13 @@ import (
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
// drainGracePeriod bounds how long we wait for an output-copy goroutine to
|
||||
// finish draining a container's output before returning, so that neither a
|
||||
// cancellation (waitForCommand) nor a normal container exit (wait) truncates
|
||||
// the tail of the log. It is a safety bound: in the common case the stream
|
||||
// reaches EOF and the goroutine returns well before this elapses.
|
||||
const drainGracePeriod = 2 * time.Second
|
||||
|
||||
// NewContainer creates a reference to a container
|
||||
func NewContainer(input *NewContainerInput) ExecutionsEnvironment {
|
||||
cr := new(containerReference)
|
||||
@@ -229,6 +237,10 @@ type containerReference struct {
|
||||
input *NewContainerInput
|
||||
UID int
|
||||
GID int
|
||||
// attachDone is closed by the attach() streaming goroutine once it has
|
||||
// drained and flushed the container's output. wait() blocks on it so the
|
||||
// tail of the log lands before the step proceeds.
|
||||
attachDone chan struct{}
|
||||
LinuxContainerEnvironmentExtensions
|
||||
}
|
||||
|
||||
@@ -730,7 +742,9 @@ func (cr *containerReference) tryReadGID() common.Executor {
|
||||
func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal bool, resp client.HijackedResponse, _ client.ExecCreateResult, _, _ string) error {
|
||||
logger := common.Logger(ctx)
|
||||
|
||||
cmdResponse := make(chan error)
|
||||
// Buffered so the copy goroutine never blocks on send if the grace-period
|
||||
// drain below times out and no one is left to receive.
|
||||
cmdResponse := make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
var outWriter io.Writer
|
||||
@@ -749,6 +763,11 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
||||
} else {
|
||||
_, err = io.Copy(outWriter, resp.Reader)
|
||||
}
|
||||
// Flush any buffered, not-yet-newline-terminated trailing line so the
|
||||
// final line of a command's output is not lost (e.g. an error message
|
||||
// printed without a trailing newline before the process exits).
|
||||
common.FlushWriter(outWriter)
|
||||
common.FlushWriter(errWriter)
|
||||
cmdResponse <- err
|
||||
}()
|
||||
|
||||
@@ -760,6 +779,16 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
||||
logger.Warnf("Failed to send CTRL+C: %+s", err)
|
||||
}
|
||||
|
||||
// Give the copy goroutine a brief grace period to drain output already
|
||||
// produced by the command before we return, so cancellation does not
|
||||
// truncate the tail of the log. The goroutine exits once the hijacked
|
||||
// stream is closed by resp.Close() in the caller's defer.
|
||||
select {
|
||||
case <-cmdResponse:
|
||||
case <-time.After(drainGracePeriod):
|
||||
logger.Warn("Timed out draining command output after cancellation")
|
||||
}
|
||||
|
||||
// we return the context canceled error to prevent other steps
|
||||
// from executing
|
||||
return ctx.Err()
|
||||
@@ -945,14 +974,23 @@ func (cr *containerReference) attach() common.Executor {
|
||||
if errWriter == nil {
|
||||
errWriter = os.Stderr
|
||||
}
|
||||
done := make(chan struct{})
|
||||
cr.attachDone = done
|
||||
go func() {
|
||||
defer close(done)
|
||||
var copyErr error
|
||||
if !isTerminal || os.Getenv("NORAW") != "" {
|
||||
_, err = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
||||
_, copyErr = stdcopy.StdCopy(outWriter, errWriter, out.Reader)
|
||||
} else {
|
||||
_, err = io.Copy(outWriter, out.Reader)
|
||||
_, copyErr = io.Copy(outWriter, out.Reader)
|
||||
}
|
||||
if err != nil {
|
||||
common.Logger(ctx).Error(err)
|
||||
// Flush any buffered, not-yet-newline-terminated trailing line once
|
||||
// the stream reaches EOF, so the final line of the container's
|
||||
// output is not lost when it is not newline-terminated.
|
||||
common.FlushWriter(outWriter)
|
||||
common.FlushWriter(errWriter)
|
||||
if copyErr != nil {
|
||||
common.Logger(ctx).Error(copyErr)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
@@ -991,6 +1029,18 @@ func (cr *containerReference) wait() common.Executor {
|
||||
|
||||
logger.Debugf("Return status: %v", statusCode)
|
||||
|
||||
// The container has exited; wait for the attach() streaming goroutine to
|
||||
// finish draining and flushing its output before returning, so the tail
|
||||
// of the log is not lost. Bounded so a stuck stream cannot hang the step.
|
||||
if cr.attachDone != nil {
|
||||
select {
|
||||
case <-cr.attachDone:
|
||||
case <-time.After(drainGracePeriod):
|
||||
logger.Warn("Timed out draining container output")
|
||||
}
|
||||
cr.attachDone = nil
|
||||
}
|
||||
|
||||
if statusCode == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user