mirror of
https://gitea.com/gitea/act_runner.git
synced 2026-06-09 18:44:23 +02:00
fix(container): re-validate cached container id before reuse (#1003)
`containerReference.id` was cached from `Create()` and never re-validated, so a container torn down out-of-band (AutoRemove on an unexpected exit, daemon-side cleanup, sibling-job race in a parallel matrix) left a stale id behind. The next `Copy`/`Exec` then hit the daemon with that dead id and failed the otherwise-successful job with `Could not find the file /var/run/act/ in container <id>`. `find()` now `ContainerInspect`s the cached id and clears it only on a definitive `NotFound`; transient errors trust the cache so cleanup pipelines don't abort on a daemon blip. Operations that need a live container (`copyContent`/`copyDir`/`CopyTarStream`/`exec`/`GetContainerArchive`) fail fast with a clear `container "<name>" does not exist` instead of the daemon's generic empty-id error. --- This PR was written with the help of Claude Opus 4.7 --------- Co-authored-by: Nicolas <bircni@icloud.com> Reviewed-on: https://gitea.com/gitea/runner/pulls/1003 Reviewed-by: Nicolas <bircni@icloud.com> Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com> Co-committed-by: silverwind <2021+silverwind@noreply.gitea.com>
This commit is contained in:
@@ -26,6 +26,7 @@ import (
|
||||
|
||||
"dario.cat/mergo"
|
||||
"github.com/Masterminds/semver"
|
||||
cerrdefs "github.com/containerd/errdefs"
|
||||
"github.com/docker/cli/cli/compose/loader"
|
||||
"github.com/docker/cli/cli/connhelper"
|
||||
"github.com/go-git/go-billy/v5/helper/polyfill"
|
||||
@@ -152,6 +153,8 @@ func (cr *containerReference) Copy(destPath string, files ...*FileEntry) common.
|
||||
func (cr *containerReference) CopyDir(destPath, srcPath string, useGitIgnore bool) common.Executor {
|
||||
return common.NewPipelineExecutor(
|
||||
common.NewInfoExecutor("docker cp src=%s dst=%s", srcPath, destPath),
|
||||
cr.connect(),
|
||||
cr.find(),
|
||||
cr.copyDir(destPath, srcPath, useGitIgnore),
|
||||
func(ctx context.Context) error {
|
||||
// If this fails, then folders have wrong permissions on non root container
|
||||
@@ -167,6 +170,16 @@ func (cr *containerReference) GetContainerArchive(ctx context.Context, srcPath s
|
||||
if common.Dryrun(ctx) {
|
||||
return nil, errors.New("DRYRUN is not supported in GetContainerArchive")
|
||||
}
|
||||
// Direct entry point (no pipeline) — revalidate cr.id ourselves.
|
||||
if err := cr.connect()(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cr.find()(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if cr.id == "" {
|
||||
return nil, cr.missingContainerError("get archive %s", srcPath)
|
||||
}
|
||||
result, err := cr.cli.CopyFromContainer(ctx, cr.id, client.CopyFromContainerOptions{SourcePath: srcPath})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -314,10 +327,22 @@ func (cr *containerReference) Close() common.Executor {
|
||||
}
|
||||
}
|
||||
|
||||
// missingContainerError is the shared "container X does not exist" error
|
||||
// used by ops that need a live cr.id.
|
||||
func (cr *containerReference) missingContainerError(format string, args ...any) error {
|
||||
return fmt.Errorf("container %q does not exist; cannot "+format, append([]any{cr.input.Name}, args...)...)
|
||||
}
|
||||
|
||||
func (cr *containerReference) find() common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
if cr.id != "" {
|
||||
return nil
|
||||
// Validate cached id; clear only on definitive NotFound so a
|
||||
// transient daemon error doesn't abort cleanup pipelines.
|
||||
_, err := cr.cli.ContainerInspect(ctx, cr.id, client.ContainerInspectOptions{})
|
||||
if !cerrdefs.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
cr.id = ""
|
||||
}
|
||||
containers, err := cr.cli.ContainerList(ctx, client.ContainerListOptions{
|
||||
All: true,
|
||||
@@ -335,7 +360,6 @@ func (cr *containerReference) find() common.Executor {
|
||||
}
|
||||
}
|
||||
|
||||
cr.id = ""
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -592,6 +616,9 @@ func (cr *containerReference) extractFromImageEnv(env *map[string]string) common
|
||||
|
||||
func (cr *containerReference) exec(cmd []string, env map[string]string, user, workdir string) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
if cr.id == "" {
|
||||
return cr.missingContainerError("exec %v", cmd)
|
||||
}
|
||||
logger := common.Logger(ctx)
|
||||
// Fix slashes when running on Windows
|
||||
if runtime.GOOS == "windows" {
|
||||
@@ -746,6 +773,9 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo
|
||||
}
|
||||
|
||||
func (cr *containerReference) CopyTarStream(ctx context.Context, destPath string, tarStream io.Reader) error {
|
||||
if cr.id == "" {
|
||||
return cr.missingContainerError("copy to %s", destPath)
|
||||
}
|
||||
// Mkdir
|
||||
buf := &bytes.Buffer{}
|
||||
tw := tar.NewWriter(buf)
|
||||
@@ -779,6 +809,9 @@ func (cr *containerReference) CopyTarStream(ctx context.Context, destPath string
|
||||
|
||||
func (cr *containerReference) copyDir(dstPath, srcPath string, useGitIgnore bool) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
if cr.id == "" {
|
||||
return cr.missingContainerError("copy directory to %s", dstPath)
|
||||
}
|
||||
logger := common.Logger(ctx)
|
||||
tarFile, err := os.CreateTemp("", "act")
|
||||
if err != nil {
|
||||
@@ -853,6 +886,9 @@ func (cr *containerReference) copyDir(dstPath, srcPath string, useGitIgnore bool
|
||||
|
||||
func (cr *containerReference) copyContent(dstPath string, files ...*FileEntry) common.Executor {
|
||||
return func(ctx context.Context) error {
|
||||
if cr.id == "" {
|
||||
return cr.missingContainerError("copy to %s", dstPath)
|
||||
}
|
||||
logger := common.Logger(ctx)
|
||||
var buf bytes.Buffer
|
||||
tw := tar.NewWriter(&buf)
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
|
||||
"gitea.com/gitea/runner/act/common"
|
||||
|
||||
cerrdefs "github.com/containerd/errdefs"
|
||||
"github.com/moby/moby/api/types/container"
|
||||
mobyclient "github.com/moby/moby/client"
|
||||
"github.com/sirupsen/logrus/hooks/test"
|
||||
@@ -98,6 +99,16 @@ func (m *mockDockerClient) CopyToContainer(ctx context.Context, id string, optio
|
||||
return args.Get(0).(mobyclient.CopyToContainerResult), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockDockerClient) ContainerInspect(ctx context.Context, id string, opts mobyclient.ContainerInspectOptions) (mobyclient.ContainerInspectResult, error) {
|
||||
args := m.Called(ctx, id, opts)
|
||||
return args.Get(0).(mobyclient.ContainerInspectResult), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockDockerClient) ContainerList(ctx context.Context, opts mobyclient.ContainerListOptions) (mobyclient.ContainerListResult, error) {
|
||||
args := m.Called(ctx, opts)
|
||||
return args.Get(0).(mobyclient.ContainerListResult), args.Error(1)
|
||||
}
|
||||
|
||||
type endlessReader struct {
|
||||
io.Reader
|
||||
}
|
||||
@@ -298,6 +309,105 @@ func TestDockerCopyTarStreamErrorInMkdir(t *testing.T) {
|
||||
client.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// find() must drop a stale cached id so later Copy/Exec don't hit the
|
||||
// daemon with a torn-down container.
|
||||
func TestFindRevalidatesStaleID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
notFound := cerrdefs.ErrNotFound.WithMessage("No such container")
|
||||
boom := errors.New("daemon unreachable")
|
||||
newCR := func(id string) (*containerReference, *mockDockerClient) {
|
||||
client := &mockDockerClient{}
|
||||
return &containerReference{id: id, cli: client, input: &NewContainerInput{Name: "job-1"}}, client
|
||||
}
|
||||
listOpts := mobyclient.ContainerListOptions{All: true}
|
||||
inspectOpts := mobyclient.ContainerInspectOptions{}
|
||||
|
||||
t.Run("stale id cleared, name lookup empty", func(t *testing.T) {
|
||||
cr, client := newCR("stale")
|
||||
client.On("ContainerInspect", ctx, "stale", inspectOpts).Return(mobyclient.ContainerInspectResult{}, notFound)
|
||||
client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{}, nil)
|
||||
require.NoError(t, cr.find()(ctx))
|
||||
assert.Empty(t, cr.id)
|
||||
client.AssertExpectations(t)
|
||||
})
|
||||
|
||||
t.Run("stale id cleared, name lookup repopulates", func(t *testing.T) {
|
||||
cr, client := newCR("stale")
|
||||
client.On("ContainerInspect", ctx, "stale", inspectOpts).Return(mobyclient.ContainerInspectResult{}, notFound)
|
||||
client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{Items: []container.Summary{
|
||||
{ID: "other", Names: []string{"/somebody-else"}},
|
||||
{ID: "fresh", Names: []string{"/job-1"}},
|
||||
}}, nil)
|
||||
require.NoError(t, cr.find()(ctx))
|
||||
assert.Equal(t, "fresh", cr.id)
|
||||
client.AssertExpectations(t)
|
||||
})
|
||||
|
||||
t.Run("live id kept", func(t *testing.T) {
|
||||
cr, client := newCR("live")
|
||||
client.On("ContainerInspect", ctx, "live", inspectOpts).Return(mobyclient.ContainerInspectResult{}, nil)
|
||||
require.NoError(t, cr.find()(ctx))
|
||||
assert.Equal(t, "live", cr.id)
|
||||
client.AssertExpectations(t)
|
||||
})
|
||||
|
||||
t.Run("transient inspect error trusts cache", func(t *testing.T) {
|
||||
cr, client := newCR("live")
|
||||
client.On("ContainerInspect", ctx, "live", inspectOpts).Return(mobyclient.ContainerInspectResult{}, boom)
|
||||
require.NoError(t, cr.find()(ctx))
|
||||
assert.Equal(t, "live", cr.id)
|
||||
client.AssertExpectations(t)
|
||||
})
|
||||
|
||||
t.Run("list error propagates", func(t *testing.T) {
|
||||
cr, client := newCR("")
|
||||
client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{}, boom)
|
||||
require.ErrorIs(t, cr.find()(ctx), boom)
|
||||
client.AssertExpectations(t)
|
||||
})
|
||||
}
|
||||
|
||||
// Every daemon entry point fails fast with a clear, container-named
|
||||
// error when no live cr.id is known.
|
||||
func TestRejectsMissingContainer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
client := &mockDockerClient{}
|
||||
client.On("ContainerList", ctx, mobyclient.ContainerListOptions{All: true}).Return(mobyclient.ContainerListResult{}, nil)
|
||||
cr := &containerReference{cli: client, input: &NewContainerInput{Name: "job-1"}}
|
||||
check := func(op string, err error) {
|
||||
t.Helper()
|
||||
require.Error(t, err, op)
|
||||
assert.Contains(t, err.Error(), `container "job-1" does not exist`, op)
|
||||
}
|
||||
check("copyContent", cr.copyContent("/var/run/act", &FileEntry{Name: "x", Mode: 0o644})(ctx))
|
||||
check("copyDir", cr.copyDir("/var/run/act", "/src", false)(ctx))
|
||||
check("CopyTarStream", cr.CopyTarStream(ctx, "/var/run/act", &bytes.Buffer{}))
|
||||
check("exec", cr.exec([]string{"echo"}, nil, "", "")(ctx))
|
||||
_, err := cr.GetContainerArchive(ctx, "/var/run/act/x")
|
||||
check("GetContainerArchive", err)
|
||||
}
|
||||
|
||||
// End-to-end: a stale cr.id is cleared, repopulated from name lookup,
|
||||
// and the Copy completes against the fresh id.
|
||||
func TestPublicCopyPipelineHandlesStaleID(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
client := &mockDockerClient{}
|
||||
client.On("ContainerInspect", ctx, "stale", mobyclient.ContainerInspectOptions{}).
|
||||
Return(mobyclient.ContainerInspectResult{}, cerrdefs.ErrNotFound.WithMessage("gone"))
|
||||
client.On("ContainerList", ctx, mobyclient.ContainerListOptions{All: true}).
|
||||
Return(mobyclient.ContainerListResult{Items: []container.Summary{
|
||||
{ID: "fresh", Names: []string{"/job-1"}},
|
||||
}}, nil)
|
||||
client.On("CopyToContainer", ctx, "fresh", mock.MatchedBy(func(opts mobyclient.CopyToContainerOptions) bool {
|
||||
return opts.DestinationPath == "/var/run/act"
|
||||
})).Return(mobyclient.CopyToContainerResult{}, nil)
|
||||
|
||||
cr := &containerReference{id: "stale", cli: client, input: &NewContainerInput{Name: "job-1"}}
|
||||
require.NoError(t, cr.Copy("/var/run/act", &FileEntry{Name: "x", Mode: 0o644})(ctx))
|
||||
assert.Equal(t, "fresh", cr.id)
|
||||
client.AssertExpectations(t)
|
||||
}
|
||||
|
||||
// TestDockerCopyToSymlinkPath is a regression test for gitea/runner#981. Most base images
|
||||
// symlink /var/run to /run, so copying into /var/run/act traverses that symlink. The broken
|
||||
// docker 29.5.1 daemon fails the extraction with "mkdirat var/run: file exists" (fixed in
|
||||
|
||||
Reference in New Issue
Block a user