From 92188e112bba3308a7ab45bf2a9fed083086f811 Mon Sep 17 00:00:00 2001 From: apostasie Date: Tue, 27 Aug 2024 17:49:44 -0700 Subject: [PATCH 1/3] Add generic filestore Signed-off-by: apostasie --- pkg/store/filestore.go | 384 +++++++++++++++++++++++++++++++++ pkg/store/filestore_test.go | 220 +++++++++++++++++++ pkg/store/filestore_unix.go | 43 ++++ pkg/store/filestore_windows.go | 45 ++++ pkg/store/store.go | 98 +++++++++ 5 files changed, 790 insertions(+) create mode 100644 pkg/store/filestore.go create mode 100644 pkg/store/filestore_test.go create mode 100644 pkg/store/filestore_unix.go create mode 100644 pkg/store/filestore_windows.go create mode 100644 pkg/store/store.go diff --git a/pkg/store/filestore.go b/pkg/store/filestore.go new file mode 100644 index 00000000000..ec0d98b3585 --- /dev/null +++ b/pkg/store/filestore.go @@ -0,0 +1,384 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package store + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/containerd/nerdctl/v2/pkg/lockutil" +) + +// TODO: implement a read-lock in lockutil, in addition to the current exclusive write-lock +// This might improve performance in case of (mostly read) massively parallel concurrent scenarios + +const ( + // Default filesystem permissions to use when creating dir or files + defaultFilePerm = 0o600 + defaultDirPerm = 0o700 +) + +// New returns a filesystem based Store implementation that satisfies both Manager and Locker +// Note that atomicity is "guaranteed" by `os.Rename`, which arguably is not *always* atomic. +// In particular, operating-system crashes may break that promise, and windows behavior is probably questionable. +// That being said, this is still a much better solution than writing directly to the destination file. +func New(rootPath string, dirPerm os.FileMode, filePerm os.FileMode) (Store, error) { + if rootPath == "" { + return nil, errors.Join(ErrInvalidArgument, fmt.Errorf("FileStore rootPath cannot be empty")) + } + + if dirPerm == 0 { + dirPerm = defaultDirPerm + } + + if filePerm == 0 { + filePerm = defaultFilePerm + } + + if err := os.MkdirAll(rootPath, dirPerm); err != nil { + return nil, errors.Join(ErrSystemFailure, err) + } + + return &fileStore{ + dir: rootPath, + dirPerm: dirPerm, + filePerm: filePerm, + }, nil +} + +type fileStore struct { + mutex sync.RWMutex + dir string + locked *os.File + dirPerm os.FileMode + filePerm os.FileMode +} + +func (vs *fileStore) Lock() error { + vs.mutex.Lock() + + dirFile, err := lockutil.Lock(vs.dir) + if err != nil { + return errors.Join(ErrLockFailure, err) + } + + vs.locked = dirFile + + return nil +} + +func (vs *fileStore) Release() error { + if vs.locked == nil { + return errors.Join(ErrFaultyImplementation, fmt.Errorf("cannot unlock already unlocked volume store %q", vs.dir)) + } + + defer vs.mutex.Unlock() + + defer func() { + vs.locked = nil + }() + + if err := lockutil.Unlock(vs.locked); err != nil { + return errors.Join(ErrLockFailure, err) + } + + return nil +} + +func (vs *fileStore) WithLock(fun func() error) (err error) { + if err = vs.Lock(); err != nil { + return err + } + + defer func() { + err = errors.Join(vs.Release(), err) + }() + + return fun() +} + +func (vs *fileStore) Get(key ...string) ([]byte, error) { + if vs.locked == nil { + return nil, errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + if err := validateAllPathComponents(key...); err != nil { + return nil, err + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + st, err := os.Stat(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, errors.Join(ErrNotFound, fmt.Errorf("%q does not exist", filepath.Join(key...))) + } + + return nil, errors.Join(ErrSystemFailure, err) + } + + if st.IsDir() { + return nil, errors.Join(ErrFaultyImplementation, fmt.Errorf("%q is a directory and cannot be read as a file", path)) + } + + content, err := os.ReadFile(filepath.Join(append([]string{vs.dir}, key...)...)) + if err != nil { + return nil, errors.Join(ErrSystemFailure, err) + } + + return content, nil +} + +func (vs *fileStore) Exists(key ...string) (bool, error) { + if err := validateAllPathComponents(key...); err != nil { + return false, err + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + _, err := os.Stat(filepath.Join(path)) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return false, nil + } + + return false, errors.Join(ErrSystemFailure, err) + } + + return true, nil +} + +func (vs *fileStore) Set(data []byte, key ...string) error { + if vs.locked == nil { + return errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + if err := validateAllPathComponents(key...); err != nil { + return err + } + + fileName := key[len(key)-1] + parent := vs.dir + + if len(key) > 1 { + parent = filepath.Join(append([]string{parent}, key[0:len(key)-1]...)...) + err := os.MkdirAll(parent, vs.dirPerm) + if err != nil { + return errors.Join(ErrSystemFailure, err) + } + } + + dest := filepath.Join(parent, fileName) + st, err := os.Stat(dest) + if err == nil { + if st.IsDir() { + return errors.Join(ErrFaultyImplementation, fmt.Errorf("%q is a directory and cannot be written to", dest)) + } + } + + return atomicWrite(parent, fileName, vs.filePerm, data) +} + +func (vs *fileStore) List(key ...string) ([]string, error) { + if vs.locked == nil { + return nil, errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + // Unlike Get, Set and Delete, List can have zero length key + for _, k := range key { + if err := validatePathComponent(k); err != nil { + return nil, err + } + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + st, err := os.Stat(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, errors.Join(ErrNotFound, err) + } + + return nil, errors.Join(ErrSystemFailure, err) + } + + if !st.IsDir() { + return nil, errors.Join(ErrFaultyImplementation, fmt.Errorf("%q is not a directory and cannot be enumerated", path)) + } + + dirEntries, err := os.ReadDir(path) + if err != nil { + return nil, errors.Join(ErrSystemFailure, err) + } + + entries := []string{} + for _, dirEntry := range dirEntries { + entries = append(entries, dirEntry.Name()) + } + + return entries, nil +} + +func (vs *fileStore) Delete(key ...string) error { + if vs.locked == nil { + return errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + if err := validateAllPathComponents(key...); err != nil { + return err + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + _, err := os.Stat(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return errors.Join(ErrNotFound, err) + } + + return errors.Join(ErrSystemFailure, err) + } + + if err = os.RemoveAll(path); err != nil { + return errors.Join(ErrSystemFailure, err) + } + + return nil +} + +func (vs *fileStore) Location(key ...string) (string, error) { + if err := validateAllPathComponents(key...); err != nil { + return "", err + } + + return filepath.Join(append([]string{vs.dir}, key...)...), nil +} + +func (vs *fileStore) GroupEnsure(key ...string) error { + if vs.locked == nil { + return errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + if err := validateAllPathComponents(key...); err != nil { + return err + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + if err := os.MkdirAll(path, vs.dirPerm); err != nil { + return errors.Join(ErrSystemFailure, err) + } + + return nil +} + +func (vs *fileStore) GroupSize(key ...string) (int64, error) { + if vs.locked == nil { + return 0, errors.Join(ErrFaultyImplementation, fmt.Errorf("operations on the store must use locking")) + } + + if err := validateAllPathComponents(key...); err != nil { + return 0, err + } + + path := filepath.Join(append([]string{vs.dir}, key...)...) + + st, err := os.Stat(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return 0, errors.Join(ErrNotFound, err) + } + + return 0, errors.Join(ErrSystemFailure, err) + } + + if !st.IsDir() { + return 0, errors.Join(ErrFaultyImplementation, fmt.Errorf("%q is not a directory", path)) + } + + var size int64 + var walkFn = func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + size += info.Size() + } + return err + } + + err = filepath.Walk(path, walkFn) + if err != nil { + return 0, err + } + + return size, nil +} + +// validatePathComponent will enforce os specific filename restrictions on a single path component +func validatePathComponent(pathComponent string) error { + // https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + if len(pathComponent) > 255 { + return errors.Join(ErrInvalidArgument, errors.New("identifiers must be stricly shorter than 256 characters")) + } + + if strings.TrimSpace(pathComponent) == "" { + return errors.Join(ErrInvalidArgument, errors.New("identifier cannot be empty")) + } + + if err := validatePlatformSpecific(pathComponent); err != nil { + return errors.Join(ErrInvalidArgument, err) + } + + return nil +} + +// validateAllPathComponents will enforce validation for a slice of components +func validateAllPathComponents(pathComponent ...string) error { + if len(pathComponent) == 0 { + return errors.Join(ErrInvalidArgument, errors.New("you must specify an identifier")) + } + + for _, key := range pathComponent { + if err := validatePathComponent(key); err != nil { + return err + } + } + + return nil +} + +func atomicWrite(parent string, fileName string, perm os.FileMode, data []byte) error { + dest := filepath.Join(parent, fileName) + temp := filepath.Join(parent, ".temp."+fileName) + + err := os.WriteFile(temp, data, perm) + if err != nil { + return errors.Join(ErrSystemFailure, err) + } + + err = os.Rename(temp, dest) + if err != nil { + return errors.Join(ErrSystemFailure, err) + } + + return nil +} diff --git a/pkg/store/filestore_test.go b/pkg/store/filestore_test.go new file mode 100644 index 00000000000..2496b96cbb1 --- /dev/null +++ b/pkg/store/filestore_test.go @@ -0,0 +1,220 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package store + +import ( + "testing" + "time" + + "gotest.tools/v3/assert" +) + +func TestFileStoreBasics(t *testing.T) { + tempDir := t.TempDir() + + // Creation + tempStore, err := New(tempDir, 0, 0) + assert.NilError(t, err, "temporary store creation should succeed") + + // Lock acquisition + err = tempStore.Lock() + assert.NilError(t, err, "acquiring a lock should succeed") + err = tempStore.Release() + assert.NilError(t, err, "releasing a lock should succeed") + + // Non-existent keys + _ = tempStore.Lock() + defer tempStore.Release() + + _, err = tempStore.Get("nonexistent") + assert.ErrorIs(t, err, ErrNotFound, "getting a non existent key should ErrNotFound") + + err = tempStore.Delete("nonexistent") + assert.ErrorIs(t, err, ErrNotFound, "deleting a non existent key should ErrNotFound") + + _, err = tempStore.List("nonexistent") + assert.ErrorIs(t, err, ErrNotFound, "listing a non existent key should ErrNotFound") + + doesExist, err := tempStore.Exists("nonexistent") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, !doesExist, "should not exist") + + // Listing empty store + result, err := tempStore.List() + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 0, "list empty store return zero length slice") + + // Invalid keys + _, err = tempStore.Get("..") + assert.ErrorIs(t, err, ErrInvalidArgument, "unsupported characters or patterns should return ErrInvalidArgument") + + err = tempStore.Set([]byte("foo"), "..") + assert.ErrorIs(t, err, ErrInvalidArgument, "unsupported characters or patterns should return ErrInvalidArgument") + + err = tempStore.Delete("..") + assert.ErrorIs(t, err, ErrInvalidArgument, "unsupported characters or patterns should return ErrInvalidArgument") + + _, err = tempStore.List("..") + assert.ErrorIs(t, err, ErrInvalidArgument, "unsupported characters or patterns should return ErrInvalidArgument") + + // Writing, reading, listing, deleting + err = tempStore.Set([]byte("foo"), "something") + assert.NilError(t, err, "write should be successful") + + doesExist, err = tempStore.Exists("something") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, doesExist, "should exist") + + data, err := tempStore.Get("something") + assert.NilError(t, err, "read should be successful") + assert.Assert(t, string(data) == "foo", "written data should be read back") + + result, err = tempStore.List() + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 1, "list store with one element should return it") + + // Read from the list key obtained + data, err = tempStore.Get(result[0]) + assert.NilError(t, err, "read should be successful") + assert.Assert(t, string(data) == "foo", "written data should be read back") + + err = tempStore.Delete("something") + assert.NilError(t, err, "delete should be successful") + + doesExist, err = tempStore.Exists("something") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, !doesExist, "should not exist") + + result, err = tempStore.List() + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 0, "list store should return it empty slice") +} + +func TestFileStoreGroups(t *testing.T) { + tempDir := t.TempDir() + + // Creation + tempStore, err := New(tempDir, 0, 0) + assert.NilError(t, err, "temporary store creation should succeed") + + _ = tempStore.Lock() + defer tempStore.Release() + + err = tempStore.Set([]byte("foo"), "group", "subgroup", "actualkey") + assert.NilError(t, err, "write should be successful") + + doesExist, err := tempStore.Exists("group", "subgroup", "actualkey") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, doesExist, "should exist") + + data, err := tempStore.Get("group", "subgroup", "actualkey") + assert.NilError(t, err, "read should be successful") + assert.Assert(t, string(data) == "foo", "written data should be read back") + + result, err := tempStore.List() + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 1) + assert.Assert(t, result[0] == "group") + + result, err = tempStore.List("group") + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 1) + assert.Assert(t, result[0] == "subgroup") + + result, err = tempStore.List("group", "subgroup") + assert.NilError(t, err, "listing store root should succeed") + assert.Assert(t, len(result) == 1) + assert.Assert(t, result[0] == "actualkey") + + err = tempStore.Delete("group", "subgroup", "actualkey") + assert.NilError(t, err, "delete should be successful") + + doesExist, err = tempStore.Exists("group", "subgroup", "actualkey") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, !doesExist, "should not exist") + + doesExist, err = tempStore.Exists("group", "subgroup") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, doesExist, "should exist") + + err = tempStore.Delete("group", "subgroup") + assert.NilError(t, err, "delete should be successful") + + doesExist, err = tempStore.Exists("group", "subgroup") + assert.NilError(t, err, "exist should not error") + assert.Assert(t, !doesExist, "should not exist") + +} + +func TestFileStoreConcurrent(t *testing.T) { + tempDir := t.TempDir() + + // Creation + tempStore, err := New(tempDir, 0, 0) + assert.NilError(t, err, "temporary store creation should succeed") + + go func() { + lErr := tempStore.WithLock(func() error { + err := tempStore.Set([]byte("routine 1"), "concurrentkey") + assert.NilError(t, err, "writing should not error") + time.Sleep(1 * time.Second) + result, err := tempStore.Get("concurrentkey") + assert.NilError(t, err, "reading should not error") + assert.Assert(t, string(result) == "routine 1") + return nil + }) + assert.NilError(t, lErr, "locking should not error") + }() + + go func() { + time.Sleep(500 * time.Millisecond) + lErr := tempStore.WithLock(func() error { + err := tempStore.Set([]byte("routine 2"), "concurrentkey") + assert.NilError(t, err, "writing should not error") + time.Sleep(1 * time.Second) + result, err := tempStore.Get("concurrentkey") + assert.NilError(t, err, "reading should not error") + assert.Assert(t, string(result) == "routine 2") + return nil + }) + assert.NilError(t, lErr, "locking should not error") + }() + + lErr := tempStore.WithLock(func() error { + err := tempStore.Set([]byte("main routine 1"), "concurrentkey") + assert.NilError(t, err, "writing should not error") + time.Sleep(1 * time.Second) + result, err := tempStore.Get("concurrentkey") + assert.NilError(t, err, "reading should not error") + assert.Assert(t, string(result) == "main routine 1") + return nil + }) + assert.NilError(t, lErr, "locking should not error") + + time.Sleep(750 * time.Millisecond) + + lErr = tempStore.WithLock(func() error { + err := tempStore.Set([]byte("main routine 2"), "concurrentkey") + assert.NilError(t, err, "writing should not error") + time.Sleep(1 * time.Second) + result, err := tempStore.Get("concurrentkey") + assert.NilError(t, err, "reading should not error") + assert.Assert(t, string(result) == "main routine 2") + return nil + }) + assert.NilError(t, lErr, "locking should not error") +} diff --git a/pkg/store/filestore_unix.go b/pkg/store/filestore_unix.go new file mode 100644 index 00000000000..b694b6fc744 --- /dev/null +++ b/pkg/store/filestore_unix.go @@ -0,0 +1,43 @@ +//go:build unix + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package store + +import ( + "fmt" + "regexp" +) + +// Note that Darwin has different restrictions - though, we do not support Darwin at this point... +// https://stackoverflow.com/questions/1976007/what-characters-are-forbidden-in-windows-and-linux-directory-names +var ( + disallowedKeywords = regexp.MustCompile(`^([.]|[.][.])$`) + reservedCharacters = regexp.MustCompile(`[\x{0}/]`) +) + +func validatePlatformSpecific(pathComponent string) error { + if reservedCharacters.MatchString(pathComponent) { + return fmt.Errorf("identifier %q cannot contain any of the following characters: %q", pathComponent, reservedCharacters) + } + + if disallowedKeywords.MatchString(pathComponent) { + return fmt.Errorf("identifier %q cannot be any of the reserved keywords: %q", pathComponent, disallowedKeywords) + } + + return nil +} diff --git a/pkg/store/filestore_windows.go b/pkg/store/filestore_windows.go new file mode 100644 index 00000000000..9fbfe3e575a --- /dev/null +++ b/pkg/store/filestore_windows.go @@ -0,0 +1,45 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package store + +import ( + "fmt" + "regexp" +) + +// See https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file +// https://stackoverflow.com/questions/1976007/what-characters-are-forbidden-in-windows-and-linux-directory-names +var ( + disallowedKeywords = regexp.MustCompile(`(?i)^(con|prn|nul|aux|com[1-9¹²³]|lpt[1-9¹²³])([.].*)?`) + reservedCharacters = regexp.MustCompile(`[\x{0}-\x{1f}<>:"/\\|?*]`) +) + +func validatePlatformSpecific(pathComponent string) error { + if reservedCharacters.MatchString(pathComponent) { + return fmt.Errorf("identifier %q cannot contain any of the following characters: %q", pathComponent, reservedCharacters) + } + + if disallowedKeywords.MatchString(pathComponent) { + return fmt.Errorf("identifier %q cannot be any of the reserved keywords: %q", pathComponent, disallowedKeywords) + } + + if pathComponent[len(pathComponent)-1:] == "." || pathComponent[len(pathComponent)-1:] == " " { + return fmt.Errorf("identifier %q cannot end with a space of dot", pathComponent) + } + + return nil +} diff --git a/pkg/store/store.go b/pkg/store/store.go new file mode 100644 index 00000000000..78b977dfaf9 --- /dev/null +++ b/pkg/store/store.go @@ -0,0 +1,98 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Package store provides a concurrency-safe lightweight storage solution with a simple interface. +// Embedders should call `Lock` and `defer Release` (or WithLock(func()error)) to wrap operations, +// or series of operations, to ensure secure use. +// Furthermore, a Store implementation must do atomic writes, providing guarantees that interrupted partial writes +// never get committed. +// The Store interface itself is meant to be generic, and alternative stores (memory based, or content-addressable) +// may be implemented that satisfies it. +// This package also provides the default, file based implementation that we are using. +package store + +import ( + "errors" + + "github.com/containerd/errdefs" +) + +var ( + // ErrInvalidArgument may be returned by Get, Set, List, or Delete by specific SafeStore implementations + // (eg: filesystem), when they want to impose implementation dependent restrictions on the identifiers + // (filesystems typically do). + ErrInvalidArgument = errdefs.ErrInvalidArgument + // ErrNotFound may be returned by Get or Delete when the requested key is not present in the store + ErrNotFound = errdefs.ErrNotFound + // ErrSystemFailure may be returned by implementations when an internal failure occurs. + // For example, for a filesystem implementation, failure to create a file will be wrapped by this error. + ErrSystemFailure = errors.New("system failure") + // ErrLockFailure may be returned by ReadLock, WriteLock, or Unlock, when the underlying locking mechanism fails. + // In the case of the filesystem implementation, inability to lock the directory will return it. + ErrLockFailure = errors.New("lock failure") + // ErrFaultyImplementation may be returned by Get or Set when the target key exists and is a dir, + // or by List when the target key is a file + // This is indicative the code using the store is not consistent with what it treats as group, and what it treats as key + // and is definitely a bug in that code + // Missing lock will also trigger this when detected. + ErrFaultyImplementation = errors.New("code needs to be fixed") +) + +// Store represents a store that is able to grant an exclusive lock (ensuring concurrency safety, +// both between go routines and across multiple binaries invocations), and is performing atomic operations. +// Note that Store allows manipulating nested objects: +// - Set([]byte("mykeyvalue"), "group", "subgroup", "my key1") +// - Set([]byte("mykeyvalue"), "group", "subgroup", "my key2") +// - Get("group", "subgroup", "my key1") +// - List("group", "subgroup") +// Note that both Delete and Exists can be applied indifferently to specific keys, or groups. +type Store interface { + Locker + Manager +} + +// Manager describes operations that can be performed on the store +type Manager interface { + // List will return a slice of all subgroups (eg: subdirectories), or keys (eg: files), under a specific group (eg: dir) + // Note that `key...` may be omitted, in which case, all objects' names at the root of the store are returned. + // Example, in the volumestore, List() will return all existing volumes names + List(key ...string) ([]string, error) + // Exists checks that a given key exists + // Example: Exists("meta.json") + Exists(key ...string) (bool, error) + // Get returns the content of a key + Get(key ...string) ([]byte, error) + // Set saves bytes to a key + Set(data []byte, key ...string) error + // Delete removes a key or a group + Delete(key ...string) error + // Location returns the absolute path to a certain resource + // Note that this technically "leaks" (filesystem) implementation details up. + // It is necessary though when we are going to pass these filepath to containerd for eg. + Location(key ...string) (string, error) + + // GroupSize will return the combined size of all objects stored under the group (eg: dir) + GroupSize(key ...string) (int64, error) + // GroupEnsure ensures that a given group (eg: directory) exists + GroupEnsure(key ...string) error +} + +// Locker describes a locking mechanism that can be used to encapsulate operations in a safe way +type Locker interface { + Lock() error + Release() error + WithLock(fun func() error) (err error) +} From c9756a942e3f51fd4e47112d44effe9578442868 Mon Sep 17 00:00:00 2001 From: apostasie Date: Tue, 27 Aug 2024 19:36:52 -0700 Subject: [PATCH 2/3] Remove mock for mountutil tests Signed-off-by: apostasie --- pkg/mountutil/mountutil_linux_test.go | 31 ---- pkg/mountutil/mountutil_test.go | 37 +++++ pkg/mountutil/mountutil_windows_test.go | 31 ---- pkg/mountutil/mountutilmock/os.mock.go | 56 -------- .../mountutilmock/volumestore.mock.go | 132 ------------------ 5 files changed, 37 insertions(+), 250 deletions(-) create mode 100644 pkg/mountutil/mountutil_test.go delete mode 100644 pkg/mountutil/mountutilmock/os.mock.go delete mode 100644 pkg/mountutil/mountutilmock/volumestore.mock.go diff --git a/pkg/mountutil/mountutil_linux_test.go b/pkg/mountutil/mountutil_linux_test.go index faa10511e71..80e21542cea 100644 --- a/pkg/mountutil/mountutil_linux_test.go +++ b/pkg/mountutil/mountutil_linux_test.go @@ -22,15 +22,11 @@ import ( "testing" "github.com/opencontainers/runtime-spec/specs-go" - "go.uber.org/mock/gomock" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" "github.com/containerd/containerd/v2/core/mount" "github.com/containerd/containerd/v2/pkg/oci" - - "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" - mocks "github.com/containerd/nerdctl/v2/pkg/mountutil/mountutilmock" ) // TestParseVolumeOptions tests volume options are parsed as expected. @@ -265,23 +261,6 @@ func TestProcessFlagV(t *testing.T) { }, } - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockVolumeStore := mocks.NewMockVolumeStore(ctrl) - mockVolumeStore. - EXPECT(). - Get(gomock.Any(), false). - Return(&native.Volume{Name: "test_volume", Mountpoint: "/test/volume", Size: 1024}, nil). - AnyTimes() - mockVolumeStore. - EXPECT(). - Create(gomock.Any(), nil). - Return(&native.Volume{Name: "test_volume", Mountpoint: "/test/volume"}, nil).AnyTimes() - - mockOs := mocks.NewMockOs(ctrl) - mockOs.EXPECT().Stat(gomock.Any()).Return(nil, nil).AnyTimes() - for _, tt := range tests { t.Run(tt.rawSpec, func(t *testing.T) { processedVolSpec, err := ProcessFlagV(tt.rawSpec, mockVolumeStore, false) @@ -347,16 +326,6 @@ func TestProcessFlagVAnonymousVolumes(t *testing.T) { }, } - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockVolumeStore := mocks.NewMockVolumeStore(ctrl) - mockVolumeStore. - EXPECT(). - Create(gomock.Any(), []string{}). - Return(&native.Volume{Name: "test_volume", Mountpoint: "/test/volume"}, nil). - AnyTimes() - for _, tt := range tests { t.Run(tt.rawSpec, func(t *testing.T) { processedVolSpec, err := ProcessFlagV(tt.rawSpec, mockVolumeStore, true) diff --git a/pkg/mountutil/mountutil_test.go b/pkg/mountutil/mountutil_test.go new file mode 100644 index 00000000000..619f4269c66 --- /dev/null +++ b/pkg/mountutil/mountutil_test.go @@ -0,0 +1,37 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package mountutil + +import ( + "runtime" + + "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" + "github.com/containerd/nerdctl/v2/pkg/mountutil/volumestore" +) + +type MockVolumeStore struct { + volumestore.VolumeStore +} + +func (mv *MockVolumeStore) CreateWithoutLock(name string, labels []string) (*native.Volume, error) { + if runtime.GOOS == "windows" { + return &native.Volume{Name: "test_volume", Mountpoint: "C:\\test\\directory"}, nil + } + return &native.Volume{Name: "test_volume", Mountpoint: "/test/volume"}, nil +} + +var mockVolumeStore = &MockVolumeStore{} diff --git a/pkg/mountutil/mountutil_windows_test.go b/pkg/mountutil/mountutil_windows_test.go index 8ee4725a2cb..661c45c0f5a 100644 --- a/pkg/mountutil/mountutil_windows_test.go +++ b/pkg/mountutil/mountutil_windows_test.go @@ -22,12 +22,8 @@ import ( "testing" "github.com/opencontainers/runtime-spec/specs-go" - "go.uber.org/mock/gomock" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" - - "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" - mocks "github.com/containerd/nerdctl/v2/pkg/mountutil/mountutilmock" ) func TestParseVolumeOptions(t *testing.T) { @@ -268,23 +264,6 @@ func TestProcessFlagV(t *testing.T) { }, } - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockVolumeStore := mocks.NewMockVolumeStore(ctrl) - mockVolumeStore. - EXPECT(). - Get(gomock.Any(), false). - Return(&native.Volume{Name: "test_volume", Mountpoint: "C:\\test\\directory", Size: 1024}, nil). - AnyTimes() - mockVolumeStore. - EXPECT(). - Create(gomock.Any(), nil). - Return(&native.Volume{Name: "test_volume", Mountpoint: "C:\\test\\directory"}, nil).AnyTimes() - - mockOs := mocks.NewMockOs(ctrl) - mockOs.EXPECT().Stat(gomock.Any()).Return(nil, nil).AnyTimes() - for _, tt := range tests { t.Run(tt.rawSpec, func(t *testing.T) { processedVolSpec, err := ProcessFlagV(tt.rawSpec, mockVolumeStore, true) @@ -342,16 +321,6 @@ func TestProcessFlagVAnonymousVolumes(t *testing.T) { }, } - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - mockVolumeStore := mocks.NewMockVolumeStore(ctrl) - mockVolumeStore. - EXPECT(). - Create(gomock.Any(), []string{}). - Return(&native.Volume{Name: "test_volume", Mountpoint: "C:\\test\\directory"}, nil). - AnyTimes() - for _, tt := range tests { t.Run(tt.rawSpec, func(t *testing.T) { processedVolSpec, err := ProcessFlagV(tt.rawSpec, mockVolumeStore, true) diff --git a/pkg/mountutil/mountutilmock/os.mock.go b/pkg/mountutil/mountutilmock/os.mock.go deleted file mode 100644 index 3cc1718e5bd..00000000000 --- a/pkg/mountutil/mountutilmock/os.mock.go +++ /dev/null @@ -1,56 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mountutilmock - -import ( - "os" - "reflect" - - "go.uber.org/mock/gomock" -) - -type MockOs struct { - ctrl *gomock.Controller - recorder *MockOsMockRecorder -} - -type MockOsMockRecorder struct { - mock *MockOs -} - -func NewMockOs(ctrl *gomock.Controller) *MockOs { - mock := &MockOs{ctrl: ctrl} - mock.recorder = &MockOsMockRecorder{mock} - return mock -} - -func (m *MockOs) EXPECT() *MockOsMockRecorder { - return m.recorder -} - -func (m *MockOs) Stat(name string) (os.FileInfo, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Stat", name) - ret0, _ := ret[0].(os.FileInfo) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -func (m *MockOsMockRecorder) Stat(name any) *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Stat", reflect.TypeOf((*MockOs)(nil).Stat), name) -} diff --git a/pkg/mountutil/mountutilmock/volumestore.mock.go b/pkg/mountutil/mountutilmock/volumestore.mock.go deleted file mode 100644 index f4a57f26d7b..00000000000 --- a/pkg/mountutil/mountutilmock/volumestore.mock.go +++ /dev/null @@ -1,132 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package mountutilmock - -import ( - "reflect" - - "go.uber.org/mock/gomock" - - "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" -) - -// MockVolumeStore is a mock of VolumeStore interface -type MockVolumeStore struct { - ctrl *gomock.Controller - recorder *MockVolumeStoreMockRecorder -} - -// MockVolumeStoreMockRecorder is the mock recorder for MockVolumeStore -type MockVolumeStoreMockRecorder struct { - mock *MockVolumeStore -} - -// NewMockVolumeStore creates a new mock instance -func NewMockVolumeStore(ctrl *gomock.Controller) *MockVolumeStore { - mock := &MockVolumeStore{ctrl: ctrl} - mock.recorder = &MockVolumeStoreMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use -func (m *MockVolumeStore) EXPECT() *MockVolumeStoreMockRecorder { - return m.recorder -} - -// Create mocks the Create method of VolumeStore -func (m *MockVolumeStore) Create(name string, labels []string) (*native.Volume, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Create", name, labels) - ret0, _ := ret[0].(*native.Volume) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// Create indicates an expected call of Create -func (m *MockVolumeStoreMockRecorder) Create(name any, labels any) *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Create", reflect.TypeOf((*MockVolumeStore)(nil).Create), name, labels) -} - -// Get mocks the Get method of VolumeStore -func (m *MockVolumeStore) Get(name string, size bool) (*native.Volume, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Get", name, size) - ret0, _ := ret[0].(*native.Volume) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// Get indicates an expected call of Get -func (m *MockVolumeStoreMockRecorder) Get(name any, size any) *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Get", reflect.TypeOf((*MockVolumeStore)(nil).Get), name, size) -} - -// List mocks the List method of VolumeStore -func (m *MockVolumeStore) List(size bool) (map[string]native.Volume, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "List", size) - ret0, _ := ret[0].(map[string]native.Volume) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// List indicates an expected call of List -func (m *MockVolumeStoreMockRecorder) List(size any) *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "List", reflect.TypeOf((*MockVolumeStore)(nil).List), size) -} - -// Remove mocks the Remove method of VolumeStore -func (m *MockVolumeStore) Remove(names []string) (removed []string, warns []error, err error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Remove", names) - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, []error{}, ret1 -} - -// Remove indicates an expected call of Remove -func (m *MockVolumeStoreMockRecorder) Remove(names any) *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Remove", reflect.TypeOf((*MockVolumeStore)(nil).Remove), names) -} - -// Lock mocks the Lock method of VolumeStore -func (m *MockVolumeStore) Lock() error { - m.ctrl.T.Helper() - return nil -} - -// Lock indicates an expected call of Lock -func (m *MockVolumeStoreMockRecorder) Lock() *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Lock", reflect.TypeOf((*MockVolumeStore)(nil).Lock)) -} - -// Unlock mocks the Unlock method of VolumeStore -func (m *MockVolumeStore) Unlock() error { - m.ctrl.T.Helper() - return nil -} - -// Unlock indicates an expected call of Unlock -func (m *MockVolumeStoreMockRecorder) Unlock() *gomock.Call { - m.mock.ctrl.T.Helper() - return m.mock.ctrl.RecordCallWithMethodType(m.mock, "Unlock", reflect.TypeOf((*MockVolumeStore)(nil).Unlock)) -} From bf89c084719237ea3f7aca1fa0d64bcb9f2f7df4 Mon Sep 17 00:00:00 2001 From: apostasie Date: Tue, 27 Aug 2024 19:39:36 -0700 Subject: [PATCH 3/3] Refactor filestores using store.Store Signed-off-by: apostasie --- cmd/nerdctl/namespace/namespace.go | 8 +- pkg/cmd/compose/compose.go | 11 +- pkg/cmd/container/create.go | 9 +- pkg/cmd/container/remove.go | 31 +- pkg/cmd/container/rename.go | 6 +- pkg/cmd/container/run_mount.go | 2 +- pkg/cmd/volume/prune.go | 72 ++- pkg/cmd/volume/rm.go | 39 +- pkg/composer/down.go | 1 + pkg/composer/up_volume.go | 6 +- .../container_network_manager.go | 36 +- .../container_network_manager_linux.go | 8 +- pkg/dnsutil/hostsstore/hostsstore.go | 344 +++++++++---- pkg/dnsutil/hostsstore/updater.go | 137 ------ pkg/dnsutil/hostsstore/updater_test.go | 3 +- pkg/identifiers/validate.go | 4 - pkg/identifiers/validate_other.go | 23 - pkg/identifiers/validate_windows.go | 41 -- pkg/inspecttypes/dockercompat/dockercompat.go | 7 +- pkg/mountutil/mountutil.go | 4 +- pkg/mountutil/volumestore/volumestore.go | 459 ++++++++++-------- pkg/namestore/namestore.go | 194 +++++--- pkg/ocihook/ocihook.go | 63 ++- pkg/ocihook/state/state.go | 135 +++--- 24 files changed, 876 insertions(+), 767 deletions(-) delete mode 100644 pkg/identifiers/validate_other.go delete mode 100644 pkg/identifiers/validate_windows.go diff --git a/cmd/nerdctl/namespace/namespace.go b/cmd/nerdctl/namespace/namespace.go index f83cc956862..6885619d341 100644 --- a/cmd/nerdctl/namespace/namespace.go +++ b/cmd/nerdctl/namespace/namespace.go @@ -18,7 +18,6 @@ package namespace import ( "fmt" - "os" "sort" "strings" "text/tabwriter" @@ -120,13 +119,10 @@ func namespaceLsAction(cmd *cobra.Command, args []string) error { if err != nil { log.L.Warn(err) } else { - volEnts, err := volStore.List(false) + numVolumes, err = volStore.Count() if err != nil { - if !os.IsNotExist(err) { - log.L.Warn(err) - } + log.L.Warn(err) } - numVolumes = len(volEnts) } labels, err := client.NamespaceService().Labels(ctx, ns) diff --git a/pkg/cmd/compose/compose.go b/pkg/cmd/compose/compose.go index 7afbd74e0b1..a37013e2aff 100644 --- a/pkg/cmd/compose/compose.go +++ b/pkg/cmd/compose/compose.go @@ -77,15 +77,8 @@ func New(client *containerd.Client, globalOptions types.GlobalCommandOptions, op if err != nil { return nil, err } - options.VolumeExists = func(volName string) (bool, error) { - _, volGetErr := volStore.Get(volName, false) - if volGetErr == nil { - return true, nil - } else if errors.Is(volGetErr, errdefs.ErrNotFound) { - return false, nil - } - return false, volGetErr - } + // FIXME: this is racy. See note in up_volume.go + options.VolumeExists = volStore.Exists options.ImageExists = func(ctx context.Context, rawRef string) (bool, error) { refNamed, err := referenceutil.ParseAny(rawRef) diff --git a/pkg/cmd/container/create.go b/pkg/cmd/container/create.go index ff7a3cd52a3..328fe7e6f6a 100644 --- a/pkg/cmd/container/create.go +++ b/pkg/cmd/container/create.go @@ -59,12 +59,14 @@ import ( "github.com/containerd/nerdctl/v2/pkg/platformutil" "github.com/containerd/nerdctl/v2/pkg/referenceutil" "github.com/containerd/nerdctl/v2/pkg/rootlessutil" + "github.com/containerd/nerdctl/v2/pkg/store" "github.com/containerd/nerdctl/v2/pkg/strutil" ) // Create will create a container. func Create(ctx context.Context, client *containerd.Client, args []string, netManager containerutil.NetworkOptionsManager, options types.ContainerCreateOptions) (containerd.Container, func(), error) { - // Acquire an exclusive lock on the volume store until we are done to avoid being raced by other volume operations + // Acquire an exclusive lock on the volume store until we are done to avoid being raced by any other + // volume operations (or any other operation involving volume manipulation) volStore, err := volume.Store(options.GOptions.Namespace, options.GOptions.DataRoot, options.GOptions.Address) if err != nil { return nil, nil, err @@ -73,7 +75,7 @@ func Create(ctx context.Context, client *containerd.Client, args []string, netMa if err != nil { return nil, nil, err } - defer volStore.Unlock() + defer volStore.Release() // simulate the behavior of double dash newArg := []string{} @@ -840,7 +842,8 @@ func generateGcFunc(ctx context.Context, container containerd.Container, ns, id, if containerNameStore, errE = namestore.New(dataStore, ns); errE != nil { log.G(ctx).WithError(errE).Warnf("failed to instantiate container name store during cleanup for container %q", id) } - if errE = containerNameStore.Release(name, id); errE != nil { + // Double-releasing may happen with containers started with --rm, so, ignore NotFound errors + if errE := containerNameStore.Release(name, id); errE != nil && !errors.Is(errE, store.ErrNotFound) { log.G(ctx).WithError(errE).Warnf("failed to release container name store for container %q (%s)", name, id) } } diff --git a/pkg/cmd/container/remove.go b/pkg/cmd/container/remove.go index 93bd16c0255..0445953be73 100644 --- a/pkg/cmd/container/remove.go +++ b/pkg/cmd/container/remove.go @@ -32,13 +32,14 @@ import ( "github.com/containerd/nerdctl/v2/pkg/api/types" "github.com/containerd/nerdctl/v2/pkg/clientutil" - "github.com/containerd/nerdctl/v2/pkg/cmd/volume" "github.com/containerd/nerdctl/v2/pkg/containerutil" "github.com/containerd/nerdctl/v2/pkg/dnsutil/hostsstore" "github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker" "github.com/containerd/nerdctl/v2/pkg/ipcutil" "github.com/containerd/nerdctl/v2/pkg/labels" + "github.com/containerd/nerdctl/v2/pkg/mountutil/volumestore" "github.com/containerd/nerdctl/v2/pkg/namestore" + "github.com/containerd/nerdctl/v2/pkg/store" ) var _ error = ErrContainerStatus{} @@ -128,18 +129,10 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions return err } // Get volume store - volStore, err := volume.Store(globalOptions.Namespace, globalOptions.DataRoot, globalOptions.Address) + volStore, err := volumestore.New(dataStore, globalOptions.Namespace) if err != nil { return err } - // Note: technically, it is not strictly necessary to acquire an exclusive lock on the volume store here. - // Worst case scenario, we would fail removing anonymous volumes later on, which is a soft error, and which would - // only happen if we concurrently tried to remove the same container. - err = volStore.Lock() - if err != nil { - return err - } - defer volStore.Unlock() // Decode IPC ipc, err := ipcutil.DecodeIPCLabel(containerLabels[labels.IPC]) if err != nil { @@ -212,24 +205,34 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions // Enforce release name here in case the poststop hook name release fails - soft failure if name != "" { - if err = nameStore.Release(name, id); err != nil { + // Double-releasing may happen with containers started with --rm, so, ignore NotFound errors + if err := nameStore.Release(name, id); err != nil && !errors.Is(err, store.ErrNotFound) { log.G(ctx).WithError(err).Warnf("failed to release container name %s", name) } } - // De-allocate hosts file - soft failure - if err = hostsstore.DeallocHostsFile(dataStore, containerNamespace, id); err != nil { + hs, err := hostsstore.New(dataStore, containerNamespace) + if err != nil { + log.G(ctx).WithError(err).Warnf("failed to instantiate hostsstore for %q", containerNamespace) + } else if err = hs.DeallocHostsFile(id); err != nil { + // De-allocate hosts file - soft failure log.G(ctx).WithError(err).Warnf("failed to remove hosts file for container %q", id) } // Volume removal is not handled by the poststop hook lifecycle because it depends on removeAnonVolumes option + // Note that the anonymous volume list has been obtained earlier, without locking the volume store. + // Technically, a concurrent operation MAY have deleted these anonymous volumes already at this point, which + // would make this operation here "soft fail". + // This is not a problem per-se, though we will output a warning in that case. if anonVolumesJSON, ok := containerLabels[labels.AnonymousVolumes]; ok && removeAnonVolumes { var anonVolumes []string if err = json.Unmarshal([]byte(anonVolumesJSON), &anonVolumes); err != nil { log.G(ctx).WithError(err).Warnf("failed to unmarshall anonvolume information for container %q", id) } else { var errs []error - _, errs, err = volStore.Remove(anonVolumes) + _, errs, err = volStore.Remove(func() ([]string, []error, error) { + return anonVolumes, nil, nil + }) if err != nil || len(errs) > 0 { log.G(ctx).WithError(err).Warnf("failed to remove anonymous volumes %v", anonVolumes) } diff --git a/pkg/cmd/container/rename.go b/pkg/cmd/container/rename.go index 6c967243b79..ca9ba6828e8 100644 --- a/pkg/cmd/container/rename.go +++ b/pkg/cmd/container/rename.go @@ -44,7 +44,7 @@ func Rename(ctx context.Context, client *containerd.Client, containerID, newCont if err != nil { return err } - hostst, err := hostsstore.NewStore(dataStore) + hostst, err := hostsstore.New(dataStore, options.GOptions.Namespace) if err != nil { return err } @@ -84,7 +84,7 @@ func renameContainer(ctx context.Context, container containerd.Container, newNam labels.Name: name, } namst.Rename(newName, id, name) - hostst.Update(ns, id, name) + hostst.Update(id, name) container.SetLabels(ctx, lbls) } }() @@ -93,7 +93,7 @@ func renameContainer(ctx context.Context, container containerd.Container, newNam return err } if runtime.GOOS == "linux" { - if err = hostst.Update(ns, id, newName); err != nil { + if err = hostst.Update(id, newName); err != nil { log.G(ctx).WithError(err).Warn("failed to update host networking definitions " + "- if your container is using network 'none', this is expected - otherwise, please report this as a bug") } diff --git a/pkg/cmd/container/run_mount.go b/pkg/cmd/container/run_mount.go index b6d0c989bc9..eb9be3aefb2 100644 --- a/pkg/cmd/container/run_mount.go +++ b/pkg/cmd/container/run_mount.go @@ -258,7 +258,7 @@ func generateMountOpts(ctx context.Context, client *containerd.Client, ensuredIm log.G(ctx).Debugf("creating anonymous volume %q, for \"VOLUME %s\"", anonVolName, imgVolRaw) - anonVol, err := volStore.Create(anonVolName, []string{}) + anonVol, err := volStore.CreateWithoutLock(anonVolName, []string{}) if err != nil { return nil, nil, nil, err } diff --git a/pkg/cmd/volume/prune.go b/pkg/cmd/volume/prune.go index f159b99757a..71e6d20e779 100644 --- a/pkg/cmd/volume/prune.go +++ b/pkg/cmd/volume/prune.go @@ -19,10 +19,12 @@ package volume import ( "context" "fmt" + "strings" containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/nerdctl/v2/pkg/api/types" + "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" "github.com/containerd/nerdctl/v2/pkg/labels" ) @@ -33,60 +35,50 @@ func Prune(ctx context.Context, client *containerd.Client, options types.VolumeP if err != nil { return err } - err = volStore.Lock() - if err != nil { - return err - } - defer volStore.Unlock() - // Get containers and see which volumes are used - containers, err := client.Containers(ctx) - if err != nil { - return err - } + var toRemove []string // nolint: prealloc - usedVolumesList, err := usedVolumes(ctx, containers) - if err != nil { - return err - } - var removeNames []string // nolint: prealloc - - // Get the list of known volumes from the store - volumes, err := volStore.List(false) - if err != nil { - return err - } + err = volStore.Prune(func(volumes []*native.Volume) ([]string, error) { + // Get containers and see which volumes are used + containers, err := client.Containers(ctx) + if err != nil { + return nil, err + } - // Iterate through the known volumes, making sure we do not remove in-use volumes - // but capture as well anon volumes (if --all was passed) - for _, volume := range volumes { - if _, ok := usedVolumesList[volume.Name]; ok { - continue + usedVolumesList, err := usedVolumes(ctx, containers) + if err != nil { + return nil, err } - if !options.All { - if volume.Labels == nil { + + for _, volume := range volumes { + if _, ok := usedVolumesList[volume.Name]; ok { continue } - val, ok := (*volume.Labels)[labels.AnonymousVolumes] - //skip the named volume and only remove the anonymous volume - if !ok || val != "" { - continue + if !options.All { + if volume.Labels == nil { + continue + } + val, ok := (*volume.Labels)[labels.AnonymousVolumes] + // skip the named volume and only remove the anonymous volume + if !ok || val != "" { + continue + } } + toRemove = append(toRemove, volume.Name) } - removeNames = append(removeNames, volume.Name) - } - // Remove the volumes from that list - removedNames, _, err := volStore.Remove(removeNames) + return toRemove, nil + }) + if err != nil { return err } - if len(removedNames) > 0 { + + if len(toRemove) > 0 { fmt.Fprintln(options.Stdout, "Deleted Volumes:") - for _, name := range removedNames { - fmt.Fprintln(options.Stdout, name) - } + fmt.Fprintln(options.Stdout, strings.Join(toRemove, "\n")) fmt.Fprintln(options.Stdout, "") } + return nil } diff --git a/pkg/cmd/volume/rm.go b/pkg/cmd/volume/rm.go index a7864a801a0..4b01564c009 100644 --- a/pkg/cmd/volume/rm.go +++ b/pkg/cmd/volume/rm.go @@ -33,45 +33,38 @@ import ( ) func Remove(ctx context.Context, client *containerd.Client, volumes []string, options types.VolumeRemoveOptions) error { - // Get the volume store and lock it until we are done. - // This will prevent racing new containers from being created or removed until we are done with the cleanup of volumes volStore, err := Store(options.GOptions.Namespace, options.GOptions.DataRoot, options.GOptions.Address) if err != nil { return err } - err = volStore.Lock() - if err != nil { - return err - } - defer volStore.Unlock() - // Get containers and see which volumes are used containers, err := client.Containers(ctx) if err != nil { return err } - usedVolumesList, err := usedVolumes(ctx, containers) - if err != nil { - return err - } - - volumeNames := []string{} - cannotRemove := []error{} + // Note: to avoid racy behavior, this is called by volStore.Remove *inside a lock* + removableVolumes := func() (volumeNames []string, cannotRemove []error, err error) { + usedVolumesList, err := usedVolumes(ctx, containers) + if err != nil { + return nil, nil, err + } - for _, name := range volumes { - if _, ok := usedVolumesList[name]; ok { - cannotRemove = append(cannotRemove, fmt.Errorf("volume %q is in use (%w)", name, errdefs.ErrFailedPrecondition)) - continue + for _, name := range volumes { + if _, ok := usedVolumesList[name]; ok { + cannotRemove = append(cannotRemove, fmt.Errorf("volume %q is in use (%w)", name, errdefs.ErrFailedPrecondition)) + continue + } + volumeNames = append(volumeNames, name) } - volumeNames = append(volumeNames, name) + + return volumeNames, cannotRemove, nil } - // if err is set, this is a hard filesystem error - removedNames, warns, err := volStore.Remove(volumeNames) + + removedNames, cannotRemove, err := volStore.Remove(removableVolumes) if err != nil { return err } - cannotRemove = append(cannotRemove, warns...) // Otherwise, output on stdout whatever was successful for _, name := range removedNames { fmt.Fprintln(options.Stdout, name) diff --git a/pkg/composer/down.go b/pkg/composer/down.go index 4ebd0d1d09b..d02cf613082 100644 --- a/pkg/composer/down.go +++ b/pkg/composer/down.go @@ -128,6 +128,7 @@ func (c *Composer) downVolume(ctx context.Context, shortName string) error { } // shortName is like "db_data", fullName is like "compose-wordpress_db_data" fullName := vol.Name + // FIXME: this is racy. See note in up_volume.go volExists, err := c.VolumeExists(fullName) if err != nil { return err diff --git a/pkg/composer/up_volume.go b/pkg/composer/up_volume.go index 929d96f40cb..0ab0db1fd7a 100644 --- a/pkg/composer/up_volume.go +++ b/pkg/composer/up_volume.go @@ -42,9 +42,9 @@ func (c *Composer) upVolume(ctx context.Context, shortName string) error { // shortName is like "db_data", fullName is like "compose-wordpress_db_data" fullName := vol.Name - // FIXME: this is racy. By the time we get below to creating the volume, there is no guarantee that things are still fine - // Furthermore, volStore.Get no longer errors if the volume already exists (docker behavior), so, the purpose of this - // call needs to be assessed (it might still error if the name is malformed, or if there is a filesystem error) + // FIXME: this is racy. By the time we get below to creating the volume, there is no guarantee that things are still fine. + // Furthermore, by the time we are done creating all the volumes, they may very well have been destroyed. + // This cannot be fixed without getting rid of the whole "shell-out" approach entirely. volExists, err := c.VolumeExists(fullName) if err != nil { return err diff --git a/pkg/containerutil/container_network_manager.go b/pkg/containerutil/container_network_manager.go index fa303ad3e9c..72b6dbb4ad0 100644 --- a/pkg/containerutil/container_network_manager.go +++ b/pkg/containerutil/container_network_manager.go @@ -230,10 +230,18 @@ func (m *containerNetworkManager) getContainerNetworkFilePaths(containerID strin if err != nil { return "", "", "", err } + hostsStore, err := hostsstore.New(dataStore, m.globalOptions.Namespace) + if err != nil { + return "", "", "", err + } hostnamePath := filepath.Join(conStateDir, "hostname") resolvConfPath := filepath.Join(conStateDir, "resolv.conf") - etcHostsPath := hostsstore.HostsPath(dataStore, m.globalOptions.Namespace, containerID) + + etcHostsPath, err := hostsStore.HostsPath(containerID) + if err != nil { + return "", "", "", err + } return hostnamePath, resolvConfPath, etcHostsPath, nil } @@ -380,7 +388,7 @@ func (m *hostNetworkManager) SetupNetworking(ctx context.Context, containerID st } // Get the hostsStore - hs, err := hostsstore.NewStore(dataStore) + hs, err := hostsstore.New(dataStore, container.Labels[labels.Namespace]) if err != nil { return err } @@ -401,7 +409,6 @@ func (m *hostNetworkManager) SetupNetworking(ctx context.Context, containerID st // Prep the meta hsMeta := hostsstore.Meta{ - Namespace: container.Labels[labels.Namespace], ID: container.ID, Hostname: container.Labels[labels.Hostname], ExtraHosts: hosts, @@ -425,20 +432,20 @@ func (m *hostNetworkManager) CleanupNetworking(ctx context.Context, container co return err } - // Get the hostsStore - hs, err := hostsstore.NewStore(dataStore) + // Get labels + lbls, err := container.Labels(ctx) if err != nil { return err } - // Get labels - lbls, err := container.Labels(ctx) + // Get the hostsStore + hs, err := hostsstore.New(dataStore, lbls[labels.Namespace]) if err != nil { return err } // Release - if err = hs.Release(lbls[labels.Namespace], container.ID()); err != nil { + if err = hs.Release(container.ID()); err != nil { return err } @@ -503,11 +510,20 @@ func (m *hostNetworkManager) ContainerNetworkingOpts(_ context.Context, containe resolvConfPath := filepath.Join(stateDir, "resolv.conf") copyFileContent("/etc/resolv.conf", resolvConfPath) - etcHostsPath, err := hostsstore.AllocHostsFile(dataStore, m.globalOptions.Namespace, containerID) + hs, err := hostsstore.New(dataStore, m.globalOptions.Namespace) + if err != nil { + return nil, nil, err + } + + content, err := os.ReadFile("/etc/hosts") + if err != nil { + return nil, nil, err + } + + etcHostsPath, err := hs.AllocHostsFile(containerID, content) if err != nil { return nil, nil, err } - copyFileContent("/etc/hosts", etcHostsPath) specs := []oci.SpecOpts{ oci.WithHostNamespace(specs.NetworkNamespace), diff --git a/pkg/containerutil/container_network_manager_linux.go b/pkg/containerutil/container_network_manager_linux.go index 11004f81c85..0afe793781e 100644 --- a/pkg/containerutil/container_network_manager_linux.go +++ b/pkg/containerutil/container_network_manager_linux.go @@ -99,10 +99,16 @@ func (m *cniNetworkManager) ContainerNetworkingOpts(_ context.Context, container } // the content of /etc/hosts is created in OCI Hook - etcHostsPath, err := hostsstore.AllocHostsFile(dataStore, m.globalOptions.Namespace, containerID) + hs, err := hostsstore.New(dataStore, m.globalOptions.Namespace) if err != nil { return nil, nil, err } + + etcHostsPath, err := hs.AllocHostsFile(containerID, []byte("")) + if err != nil { + return nil, nil, err + } + opts = append(opts, withCustomResolvConf(resolvConfPath), withCustomHosts(etcHostsPath)) if m.netOpts.UTSNamespace != UtsNamespaceHost { diff --git a/pkg/dnsutil/hostsstore/hostsstore.go b/pkg/dnsutil/hostsstore/hostsstore.go index 63ce4e9beec..7e32e561468 100644 --- a/pkg/dnsutil/hostsstore/hostsstore.go +++ b/pkg/dnsutil/hostsstore/hostsstore.go @@ -14,89 +14,69 @@ limitations under the License. */ -// Package hostsstore provides the interface for /var/lib/nerdctl//etchosts . -// Prioritize simplicity over scalability. +// Package hostsstore provides the interface for /var/lib/nerdctl//etchosts +// Prioritizes simplicity over scalability. +// All methods perform atomic writes and are safe to use concurrently. +// Note that locking is done per namespace. +// hostsstore is currently by container rename, remove, network managers, and ocihooks +// Finally, NOTE: +// Since we will write to the hosts file after it is mounted in the container, we cannot use our atomic write method +// as the inode would change on rename. +// Henceforth, hosts file mutation uses filesystem methods instead, making it the one exception that has to bypass +// the Store implementation. package hostsstore import ( + "bytes" "encoding/json" "errors" + "fmt" "os" "path/filepath" + "strings" types100 "github.com/containernetworking/cni/pkg/types/100" "github.com/containerd/errdefs" + "github.com/containerd/log" - "github.com/containerd/nerdctl/v2/pkg/lockutil" + "github.com/containerd/nerdctl/v2/pkg/store" ) const ( // hostsDirBasename is the base name of /var/lib/nerdctl//etchosts hostsDirBasename = "etchosts" - // metaJSON is stored as /var/lib/nerdctl//etchosts///meta.json + // metaJSON is stored as hostsDirBasename///meta.json metaJSON = "meta.json" + // hostsFile is stored as hostsDirBasename///hosts + hostsFile = "hosts" ) -// HostsPath returns "/var/lib/nerdctl//etchosts///hosts" -func HostsPath(dataStore, ns, id string) string { - if dataStore == "" || ns == "" || id == "" { - panic(errdefs.ErrInvalidArgument) - } - return filepath.Join(dataStore, hostsDirBasename, ns, id, "hosts") -} +// ErrHostsStore will wrap all errors here +var ErrHostsStore = errors.New("hosts-store error") -// ensureFile ensures a file with permission 0644. -// The file is initialized with no content. -// The dir (if not exists) is created with permission 0700. -func ensureFile(path string) error { - if path == "" { - return errdefs.ErrInvalidArgument - } - dir := filepath.Dir(path) - if err := os.MkdirAll(dir, 0700); err != nil { - return err - } - return os.WriteFile(path, []byte{}, 0644) -} +func New(dataStore string, namespace string) (retStore Store, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() -// AllocHostsFile is used for creating mount-bindable /etc/hosts file. -// The file is initialized with no content. -func AllocHostsFile(dataStore, ns, id string) (string, error) { - lockDir := filepath.Join(dataStore, hostsDirBasename) - if err := os.MkdirAll(lockDir, 0700); err != nil { - return "", err + if dataStore == "" || namespace == "" { + return nil, store.ErrInvalidArgument } - path := HostsPath(dataStore, ns, id) - fn := func() error { - return ensureFile(path) - } - err := lockutil.WithDirLock(lockDir, fn) - return path, err -} -func DeallocHostsFile(dataStore, ns, id string) error { - lockDir := filepath.Join(dataStore, hostsDirBasename) - if err := os.MkdirAll(lockDir, 0700); err != nil { - return err + st, err := store.New(filepath.Join(dataStore, hostsDirBasename, namespace), 0, 0o644) + if err != nil { + return nil, err } - dirToBeRemoved := filepath.Dir(HostsPath(dataStore, ns, id)) - fn := func() error { - return os.RemoveAll(dirToBeRemoved) - } - return lockutil.WithDirLock(lockDir, fn) -} -func NewStore(dataStore string) (Store, error) { - store := &store{ - dataStore: dataStore, - hostsDirectory: filepath.Join(dataStore, hostsDirBasename), - } - return store, os.MkdirAll(store.hostsDirectory, 0700) + return &hostsStore{ + safeStore: st, + }, nil } type Meta struct { - Namespace string ID string Networks map[string]*types100.Result Hostname string @@ -106,76 +86,254 @@ type Meta struct { type Store interface { Acquire(Meta) error - Release(ns, id string) error - Update(ns, id, newName string) error + Release(id string) error + Update(id, newName string) error + HostsPath(id string) (location string, err error) + DeallocHostsFile(id string) (err error) + AllocHostsFile(id string, content []byte) (location string, err error) } -type store struct { - // dataStore is /var/lib/nerdctl/ - dataStore string - // hostsDirectory is /var/lib/nerdctl//etchosts - hostsDirectory string +type hostsStore struct { + safeStore store.Store } -func (x *store) Acquire(meta Meta) error { - fn := func() error { - hostsPath := HostsPath(x.dataStore, meta.Namespace, meta.ID) - if err := ensureFile(hostsPath); err != nil { +func (x *hostsStore) Acquire(meta Meta) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + return x.safeStore.WithLock(func() error { + var loc string + loc, err = x.safeStore.Location(meta.ID, hostsFile) + if err != nil { return err } - metaB, err := json.Marshal(meta) + + if err = os.WriteFile(loc, []byte{}, 0o644); err != nil { + return errors.Join(store.ErrSystemFailure, err) + } + + var content []byte + content, err = json.Marshal(meta) if err != nil { return err } - metaPath := filepath.Join(x.hostsDirectory, meta.Namespace, meta.ID, metaJSON) - if err := os.WriteFile(metaPath, metaB, 0644); err != nil { + + if err = x.safeStore.Set(content, meta.ID, metaJSON); err != nil { return err } - return newUpdater(meta.ID, x.hostsDirectory).update() - } - return lockutil.WithDirLock(x.hostsDirectory, fn) + + return x.updateAllHosts() + }) } // Release is triggered by Poststop hooks. // It is called after the containerd task is deleted but before the delete operation returns. -func (x *store) Release(ns, id string) error { - fn := func() error { - metaPath := filepath.Join(x.hostsDirectory, ns, id, metaJSON) - if _, err := os.Stat(metaPath); errors.Is(err, os.ErrNotExist) { - return nil - } - // We remove "meta.json" but we still retain the "hosts" file - // because it is needed for restarting. The "hosts" is removed on - // `nerdctl rm`. - // https://github.com/rootless-containers/rootlesskit/issues/220#issuecomment-783224610 - if err := os.RemoveAll(metaPath); err != nil { +func (x *hostsStore) Release(id string) (err error) { + // We remove "meta.json" but we still retain the "hosts" file + // because it is needed for restarting. The "hosts" is removed on + // `nerdctl rm`. + // https://github.com/rootless-containers/rootlesskit/issues/220#issuecomment-783224610 + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + return x.safeStore.WithLock(func() error { + if err = x.safeStore.Delete(id, metaJSON); err != nil { + return err + } + + return x.updateAllHosts() + }) +} + +// AllocHostsFile is used for creating mount-bindable /etc/hosts file. +func (x *hostsStore) AllocHostsFile(id string, content []byte) (location string, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + err = x.safeStore.WithLock(func() error { + err = x.safeStore.GroupEnsure(id) + if err != nil { return err } - return newUpdater(id, x.hostsDirectory).update() + + var loc string + loc, err = x.safeStore.Location(id, hostsFile) + if err != nil { + return err + } + + err = os.WriteFile(loc, content, 0o644) + if err != nil { + err = errors.Join(store.ErrSystemFailure, err) + } + + return err + }) + if err != nil { + return "", err } - return lockutil.WithDirLock(x.hostsDirectory, fn) + + return x.safeStore.Location(id, hostsFile) } -func (x *store) Update(ns, id, newName string) error { - fn := func() error { - metaPath := filepath.Join(x.hostsDirectory, ns, id, metaJSON) - metaB, err := os.ReadFile(metaPath) +func (x *hostsStore) DeallocHostsFile(id string) (err error) { + defer func() { if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + return x.safeStore.WithLock(func() error { return x.safeStore.Delete(id, hostsFile) }) +} + +func (x *hostsStore) HostsPath(id string) (location string, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + return x.safeStore.Location(id, hostsFile) +} + +func (x *hostsStore) Update(id, newName string) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrHostsStore, err) + } + }() + + return x.safeStore.WithLock(func() error { + var content []byte + if content, err = x.safeStore.Get(id, metaJSON); err != nil { return err } + meta := &Meta{} - if err := json.Unmarshal(metaB, meta); err != nil { + if err = json.Unmarshal(content, meta); err != nil { return err } + meta.Name = newName - metaB, err = json.Marshal(meta) + content, err = json.Marshal(meta) if err != nil { return err } - if err := os.WriteFile(metaPath, metaB, 0644); err != nil { + + if err = x.safeStore.Set(content, id, metaJSON); err != nil { + return err + } + + return x.updateAllHosts() + }) +} + +func (x *hostsStore) updateAllHosts() (err error) { + entries, err := x.safeStore.List() + if err != nil { + return err + } + + metasByEntry := map[string]*Meta{} + metasByIP := map[string]*Meta{} + networkNameByIP := map[string]string{} + + // Phase 1: read all meta files + for _, entry := range entries { + var content []byte + content, err = x.safeStore.Get(entry, metaJSON) + if err != nil { + log.L.WithError(err).Debugf("unable to read %q", entry) + continue + } + meta := &Meta{} + if err = json.Unmarshal(content, meta); err != nil { + log.L.WithError(err).Warnf("unable to unmarshell %q", entry) + continue + } + metasByEntry[entry] = meta + + for netName, cniRes := range meta.Networks { + for _, ipCfg := range cniRes.IPs { + if ip := ipCfg.Address.IP; ip != nil { + if ip.IsLoopback() || ip.IsUnspecified() { + continue + } + ipStr := ip.String() + metasByIP[ipStr] = meta + networkNameByIP[ipStr] = netName + } + } + } + } + + // Phase 2: write hosts files + for _, entry := range entries { + myMeta, ok := metasByEntry[entry] + if !ok { + log.L.WithError(errdefs.ErrNotFound).Debugf("hostsstore metadata %q not found in %q?", metaJSON, entry) + continue + } + + myNetworks := make(map[string]struct{}) + for nwName := range myMeta.Networks { + myNetworks[nwName] = struct{}{} + } + + var content []byte + content, err = x.safeStore.Get(entry, hostsFile) + if err != nil { + log.L.WithError(err).Errorf("unable to retrieve the hosts file for %q", entry) + continue + } + + // parse the hosts file, keep the original host record + // retain custom /etc/hosts entries outside region + var buf bytes.Buffer + if content != nil { + if err = parseHostsButSkipMarkedRegion(&buf, bytes.NewReader(content)); err != nil { + log.L.WithError(err).Errorf("failed to read hosts file for %q", entry) + continue + } + } + + buf.WriteString(fmt.Sprintf("# %s\n", MarkerBegin)) + buf.WriteString("127.0.0.1 localhost localhost.localdomain\n") + buf.WriteString("::1 localhost localhost.localdomain\n") + + // keep extra hosts first + for host, ip := range myMeta.ExtraHosts { + buf.WriteString(fmt.Sprintf("%-15s %s\n", ip, host)) + } + + for ip, netName := range networkNameByIP { + meta := metasByIP[ip] + if line := createLine(netName, meta, myNetworks); len(line) != 0 { + buf.WriteString(fmt.Sprintf("%-15s %s\n", ip, strings.Join(line, " "))) + } + } + + buf.WriteString(fmt.Sprintf("# %s\n", MarkerEnd)) + + var loc string + loc, err = x.safeStore.Location(entry, hostsFile) + if err != nil { return err } - return newUpdater(meta.ID, x.hostsDirectory).update() + + err = os.WriteFile(loc, buf.Bytes(), 0o644) + if err != nil { + log.L.WithError(err).Errorf("failed to write hosts file for %q", entry) + } } - return lockutil.WithDirLock(x.hostsDirectory, fn) + return nil } diff --git a/pkg/dnsutil/hostsstore/updater.go b/pkg/dnsutil/hostsstore/updater.go index 2088c6cbd3c..2b77bb8301d 100644 --- a/pkg/dnsutil/hostsstore/updater.go +++ b/pkg/dnsutil/hostsstore/updater.go @@ -17,146 +17,9 @@ package hostsstore import ( - "bytes" - "encoding/json" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/containerd/errdefs" - "github.com/containerd/log" - "github.com/containerd/nerdctl/v2/pkg/netutil" ) -// newUpdater creates an updater for hostsD (/var/lib/nerdctl//etchosts) -func newUpdater(id, hostsD string) *updater { - u := &updater{ - id: id, - hostsD: hostsD, - metaByIPStr: make(map[string]*Meta), - nwNameByIPStr: make(map[string]string), - metaByDir: make(map[string]*Meta), - } - return u -} - -// updater is the struct for updater.update() -type updater struct { - id string - hostsD string // "/var/lib/nerdctl//etchosts" - metaByIPStr map[string]*Meta // key: IP string - nwNameByIPStr map[string]string // key: IP string, value: key of Meta.Networks - metaByDir map[string]*Meta // key: "/var/lib/nerdctl//etchosts//" -} - -// update updates the hostsD tree. -// Must be called with a locker for the hostsD directory. -func (u *updater) update() error { - // phase1: read meta.json - if err := u.phase1(); err != nil { - return err - } - // phase2: write hosts - return u.phase2() -} - -// phase1: read meta.json -func (u *updater) phase1() error { - readMetaWF := func(path string, _ os.FileInfo, walkErr error) error { - if walkErr != nil { - return walkErr - } - if filepath.Base(path) != metaJSON { - return nil - } - metaB, err := os.ReadFile(path) - if err != nil { - return err - } - var meta Meta - if err := json.Unmarshal(metaB, &meta); err != nil { - return err - } - u.metaByDir[filepath.Dir(path)] = &meta - for nwName, cniRes := range meta.Networks { - for _, ipCfg := range cniRes.IPs { - if ip := ipCfg.Address.IP; ip != nil { - if ip.IsLoopback() || ip.IsUnspecified() { - continue - } - ipStr := ip.String() - u.metaByIPStr[ipStr] = &meta - u.nwNameByIPStr[ipStr] = nwName - } - } - } - return nil - } - return filepath.Walk(u.hostsD, readMetaWF) -} - -// phase2: write hosts -func (u *updater) phase2() error { - writeHostsWF := func(path string, _ os.FileInfo, walkErr error) error { - if walkErr != nil { - return walkErr - } - if filepath.Base(path) != "hosts" { - return nil - } - dir := filepath.Dir(path) - myMeta, ok := u.metaByDir[dir] - if !ok { - log.L.WithError(errdefs.ErrNotFound).Debugf("hostsstore metadata %q not found in %q?", metaJSON, dir) - return nil - } - myNetworks := make(map[string]struct{}) - for nwName := range myMeta.Networks { - myNetworks[nwName] = struct{}{} - } - - // parse the hosts file, keep the original host record - // retain custom /etc/hosts entries outside region - r, err := os.Open(path) - if err != nil { - return err - } - defer r.Close() - var buf bytes.Buffer - if r != nil { - if err := parseHostsButSkipMarkedRegion(&buf, r); err != nil { - log.L.WithError(err).Warn("failed to read hosts file") - } - } - - buf.WriteString(fmt.Sprintf("# %s\n", MarkerBegin)) - buf.WriteString("127.0.0.1 localhost localhost.localdomain\n") - buf.WriteString("::1 localhost localhost.localdomain\n") - - // keep extra hosts first - for host, ip := range myMeta.ExtraHosts { - buf.WriteString(fmt.Sprintf("%-15s %s\n", ip, host)) - } - - for ip, nwName := range u.nwNameByIPStr { - meta := u.metaByIPStr[ip] - if line := createLine(nwName, meta, myNetworks); len(line) != 0 { - buf.WriteString(fmt.Sprintf("%-15s %s\n", ip, strings.Join(line, " "))) - } - } - - buf.WriteString(fmt.Sprintf("# %s\n", MarkerEnd)) - err = os.WriteFile(path, buf.Bytes(), 0644) - if err != nil { - return err - } - return nil - } - return filepath.Walk(u.hostsD, writeHostsWF) -} - // createLine returns a line string slice. // line is like "foo foo.nw0 bar bar.nw0\n" // for `nerdctl --name=foo --hostname=bar --network=n0`. diff --git a/pkg/dnsutil/hostsstore/updater_test.go b/pkg/dnsutil/hostsstore/updater_test.go index c55f4ae3cd6..03ffefae12b 100644 --- a/pkg/dnsutil/hostsstore/updater_test.go +++ b/pkg/dnsutil/hostsstore/updater_test.go @@ -74,8 +74,7 @@ func TestCreateLine(t *testing.T) { } for _, tc := range testCases { thatMeta := &Meta{ - Namespace: "default", - ID: "984d63ce45ae", + ID: "984d63ce45ae", Networks: map[string]*types100.Result{ tc.thatNetwork: { Interfaces: []*types100.Interface{ diff --git a/pkg/identifiers/validate.go b/pkg/identifiers/validate.go index bb6e62c5aaa..7f5ff3522d3 100644 --- a/pkg/identifiers/validate.go +++ b/pkg/identifiers/validate.go @@ -42,9 +42,5 @@ func ValidateDockerCompat(s string) error { return fmt.Errorf("identifier %q must match pattern %q: %w", s, AllowedIdentfierChars, errdefs.ErrInvalidArgument) } - if err := validatePlatformSpecific(s); err != nil { - return err - } - return nil } diff --git a/pkg/identifiers/validate_other.go b/pkg/identifiers/validate_other.go deleted file mode 100644 index 642c6ac3ae6..00000000000 --- a/pkg/identifiers/validate_other.go +++ /dev/null @@ -1,23 +0,0 @@ -//go:build !windows - -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package identifiers - -func validatePlatformSpecific(identifier string) error { - return nil -} diff --git a/pkg/identifiers/validate_windows.go b/pkg/identifiers/validate_windows.go deleted file mode 100644 index 64141ed3529..00000000000 --- a/pkg/identifiers/validate_windows.go +++ /dev/null @@ -1,41 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package identifiers - -import ( - "fmt" - "regexp" - - "github.com/containerd/errdefs" -) - -// See https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file - -// Note that ¹²³ is technically not required here since they are out of the regexp for regular validation -var windowsDisallowed = regexp.MustCompile(`^(con|prn|nul|aux|com[1-9¹²³]|lpt[1-9¹²³])([.].*)?`) - -func validatePlatformSpecific(identifier string) error { - if windowsDisallowed.MatchString(identifier) { - return fmt.Errorf("identifier %q must not match reserved windows pattern %q: %w", identifier, windowsDisallowed, errdefs.ErrInvalidArgument) - } - - if identifier[len(identifier)-1:] == "." { - return fmt.Errorf("windows does not allow identifiers ending with a dot or space (%q)", identifier) - } - - return nil -} diff --git a/pkg/inspecttypes/dockercompat/dockercompat.go b/pkg/inspecttypes/dockercompat/dockercompat.go index f1704f1781a..aceec3bca3d 100644 --- a/pkg/inspecttypes/dockercompat/dockercompat.go +++ b/pkg/inspecttypes/dockercompat/dockercompat.go @@ -292,8 +292,11 @@ func ContainerFromNative(n *native.Container) (*Container, error) { cs.Pid = n.Process.Pid cs.ExitCode = int(n.Process.Status.ExitStatus) if containerAnnotations[labels.StateDir] != "" { - lf := state.NewLifecycleState(containerAnnotations[labels.StateDir]) - if err := lf.WithLock(lf.Load); err == nil && !time.Time.IsZero(lf.StartedAt) { + if lf, err := state.New(containerAnnotations[labels.StateDir]); err != nil { + log.L.WithError(err).Errorf("failed retrieving state") + } else if err = lf.Load(); err != nil { + log.L.WithError(err).Errorf("failed retrieving StartedAt from state") + } else if !time.Time.IsZero(lf.StartedAt) { cs.StartedAt = lf.StartedAt.UTC().Format(time.RFC3339Nano) } } diff --git a/pkg/mountutil/mountutil.go b/pkg/mountutil/mountutil.go index b2a7b90dd30..d55a2cb6646 100644 --- a/pkg/mountutil/mountutil.go +++ b/pkg/mountutil/mountutil.go @@ -189,7 +189,7 @@ func handleAnonymousVolumes(s string, volStore volumestore.VolumeStore) (volumeS res.AnonymousVolume = idgen.GenerateID() log.L.Debugf("creating anonymous volume %q, for %q", res.AnonymousVolume, s) - anonVol, err := volStore.Create(res.AnonymousVolume, []string{}) + anonVol, err := volStore.CreateWithoutLock(res.AnonymousVolume, []string{}) if err != nil { return res, fmt.Errorf("failed to create an anonymous volume %q: %w", res.AnonymousVolume, err) } @@ -204,7 +204,7 @@ func handleNamedVolumes(source string, volStore volumestore.VolumeStore) (volume res.Name = source // Create returns an existing volume or creates a new one if necessary. - vol, err := volStore.Create(res.Name, nil) + vol, err := volStore.CreateWithoutLock(res.Name, nil) if err != nil { return res, fmt.Errorf("failed to get volume %q: %w", res.Name, err) } diff --git a/pkg/mountutil/volumestore/volumestore.go b/pkg/mountutil/volumestore/volumestore.go index 20628db56a5..47c1b2bd73e 100644 --- a/pkg/mountutil/volumestore/volumestore.go +++ b/pkg/mountutil/volumestore/volumestore.go @@ -14,297 +14,374 @@ limitations under the License. */ +// Package volumestore allows manipulating containers' volumes. +// All methods are safe to use concurrently (and perform atomic writes), except CreateWithoutLock, which is specifically +// meant to be used multiple times, inside a Lock-ed section. package volumestore import ( "encoding/json" "errors" "fmt" - "os" "path/filepath" - "github.com/containerd/errdefs" "github.com/containerd/log" "github.com/containerd/nerdctl/v2/pkg/identifiers" "github.com/containerd/nerdctl/v2/pkg/inspecttypes/native" - "github.com/containerd/nerdctl/v2/pkg/lockutil" + "github.com/containerd/nerdctl/v2/pkg/store" "github.com/containerd/nerdctl/v2/pkg/strutil" ) const ( + volumeDirBasename = "volumes" dataDirName = "_data" volumeJSONFileName = "volume.json" ) -// VolumeStore allows manipulating containers' volumes -// Every method is protected by a file lock, and is safe to use concurrently. -// If you need to use multiple methods successively (for example: List, then Remove), you should instead optin -// for an explicit durable lock, by first calling `Lock` then `defer Unlock`. -// This is also true (and important to do) for any operation that is going to inspect containers before going for -// creation or removal of volumes. +// ErrNameStore will wrap all errors here +var ErrVolumeStore = errors.New("volume-store error") + type VolumeStore interface { - Create(name string, labels []string) (*native.Volume, error) + // Exists checks if a given volume exists + Exists(name string) (bool, error) + // Get returns an existing volume Get(name string, size bool) (*native.Volume, error) + // Create will either return an existing volume, or create a new one + // NOTE that different labels will NOT create a new volume if there is one by that name already, + // but instead return the existing one with the (possibly different) labels + Create(name string, labels []string) (vol *native.Volume, err error) + // List returns all existing volumes. + // Note that list is expensive as it reads all volumes individual info List(size bool) (map[string]native.Volume, error) - Remove(names []string) (removed []string, warns []error, err error) + // Remove one of more volumes + Remove(generator func() ([]string, []error, error)) (removed []string, warns []error, err error) + // Prune will call a filtering function expected to return the volumes name to delete + Prune(filter func(volumes []*native.Volume) ([]string, error)) (err error) + // Count returns the number of volumes + Count() (count int, err error) + + // Lock: see store implementation Lock() error - Unlock() error + // CreateWithoutLock will create a volume (or return an existing one). + // This method does NOT lock (unlike Create). + // It is meant to be used between `Lock` and `Release`, and is specifically useful when multiple different volume + // creation will have to happen in different method calls (eg: container create). + CreateWithoutLock(name string, labels []string) (*native.Volume, error) + // Release: see store implementation + Release() error } // New returns a VolumeStore -func New(dataStore, ns string) (VolumeStore, error) { - if dataStore == "" || ns == "" { - return nil, errdefs.ErrInvalidArgument +func New(dataStore, namespace string) (volStore VolumeStore, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() + + if dataStore == "" || namespace == "" { + return nil, store.ErrInvalidArgument } - volStoreDir := filepath.Join(dataStore, "volumes", ns) - if err := os.MkdirAll(volStoreDir, 0700); err != nil { + st, err := store.New(filepath.Join(dataStore, volumeDirBasename, namespace), 0, 0o644) + if err != nil { return nil, err } - vs := &volumeStore{ - dir: volStoreDir, - } - return vs, nil + + return &volumeStore{ + Locker: st, + manager: st, + }, nil } type volumeStore struct { - dir string - locked *os.File + // Expose the lock primitives directly to satisfy interface for Lock and Release + store.Locker + + manager store.Manager } -// Lock should be called when you need an exclusive lock on the volume store for an extended period of time -// spanning multiple atomic method calls. -// Be sure to defer Unlock to release it. -func (vs *volumeStore) Lock() error { - if vs.locked != nil { - return fmt.Errorf("cannot lock already locked volume store %q", vs.dir) - } +// Exists checks if a volume exists in the store +func (vs *volumeStore) Exists(name string) (doesExist bool, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() - dirFile, err := lockutil.Lock(vs.dir) - if err != nil { - return err + if err = identifiers.ValidateDockerCompat(name); err != nil { + return false, err } - vs.locked = dirFile - return nil + // No need for a lock here, the operation is atomic + return vs.manager.Exists(name) } -// Unlock should be called once done (see Lock) to release the persistent lock on the store -func (vs *volumeStore) Unlock() error { - if vs.locked == nil { - return fmt.Errorf("cannot unlock already unlocked volume store %q", vs.dir) +// Get retrieves a native volume from the store, optionally with its size +func (vs *volumeStore) Get(name string, size bool) (vol *native.Volume, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() + + if err = identifiers.ValidateDockerCompat(name); err != nil { + return nil, err } + // If we require the size, this is no longer atomic, so, we need to lock + err = vs.WithLock(func() error { + vol, err = vs.rawGet(name, size) + return err + }) + + return vol, err +} + +// CreateWithoutLock will create a new volume, or return an existing one if there is one already by that name +// It does NOT lock for you - unlike all the other methods - though it *will* error if you do not lock. +// This is on purpose as volume creation in most cases are done during container creation, +// and implies an extended period of time for locking. +// To use: +// volStore.Lock() +// defer volStore.Release() +// volStore.CreateWithoutLock(...) +func (vs *volumeStore) CreateWithoutLock(name string, labels []string) (vol *native.Volume, err error) { defer func() { - vs.locked = nil + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } }() - if err := lockutil.Unlock(vs.locked); err != nil { - return fmt.Errorf("failed to unlock volume store %q: %w", vs.dir, err) + if err = identifiers.ValidateDockerCompat(name); err != nil { + return nil, err } - return nil + + return vs.rawCreate(name, labels) } -// Create will create a new volume, or return an existing one if there is one already by that name -// Besides a possible locking error, it might return ErrInvalidArgument, hard filesystem errors, json errors -func (vs *volumeStore) Create(name string, labels []string) (*native.Volume, error) { - if err := identifiers.ValidateDockerCompat(name); err != nil { - return nil, fmt.Errorf("invalid volume name: %w", err) +func (vs *volumeStore) Create(name string, labels []string) (vol *native.Volume, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() + + if err = identifiers.ValidateDockerCompat(name); err != nil { + return nil, err } - volPath := filepath.Join(vs.dir, name) - volDataPath := filepath.Join(volPath, dataDirName) - volFilePath := filepath.Join(volPath, volumeJSONFileName) + err = vs.Locker.WithLock(func() error { + vol, err = vs.rawCreate(name, labels) + return err + }) - vol := &native.Volume{} + return vol, err +} - fn := func() error { - // Failures that are not os.ErrExist must exit here - if err := os.Mkdir(volPath, 0700); err != nil && !errors.Is(err, os.ErrExist) { +func (vs *volumeStore) Count() (count int, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() + + err = vs.Locker.WithLock(func() error { + names, err := vs.manager.List() + if err != nil { return err } - if err := os.Mkdir(volDataPath, 0755); err != nil && !errors.Is(err, os.ErrExist) { + + count = len(names) + return nil + }) + + return count, err +} + +func (vs *volumeStore) List(size bool) (res map[string]native.Volume, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() + + res = make(map[string]native.Volume) + + err = vs.Locker.WithLock(func() error { + names, err := vs.manager.List() + if err != nil { return err } - volOpts := struct { - Labels map[string]string `json:"labels"` - }{} + for _, name := range names { + vol, err := vs.rawGet(name, size) + if err != nil { + log.L.WithError(err).Errorf("something is wrong with %q", name) + continue + } + res[name] = *vol + } + + return nil + }) + + return res, err +} - if len(labels) > 0 { - volOpts.Labels = strutil.ConvertKVStringsToMap(labels) +// Remove will remove one or more containers +func (vs *volumeStore) Remove(generator func() ([]string, []error, error)) (removed []string, warns []error, err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) } + }() - // Failure here must exit, no need to clean-up - labelsJSON, err := json.MarshalIndent(volOpts, "", " ") + err = vs.Locker.WithLock(func() error { + var names []string + names, warns, err = generator() if err != nil { return err } - // If it does not exist - if _, err = os.Stat(volFilePath); err != nil { - // Any other stat error than "not exists", hard exit - if !errors.Is(err, os.ErrNotExist) { - return err + for _, name := range names { + // Invalid name, soft error + if err = identifiers.ValidateDockerCompat(name); err != nil { + // TODO: we are clearly mixing presentation concerns here + // This should be handled by the cli, not here + warns = append(warns, err) + continue } - // Error was does not exist, so, write it - if err = os.WriteFile(volFilePath, labelsJSON, 0644); err != nil { + + // Erroring on Exists is a hard error + // !doesExist is a soft error + // Inability to delete is a hard error + if doesExist, err := vs.manager.Exists(name); err != nil { + return err + } else if !doesExist { + // TODO: see above + warns = append(warns, fmt.Errorf("volume %q: %w", name, store.ErrNotFound)) + continue + } else if err = vs.manager.Delete(name); err != nil { return err } - } else { - log.L.Warnf("volume %q already exists and will be returned as-is", name) - } - // At this point, we either have a volume, or created a new one successfully - vol.Name = name - vol.Mountpoint = volDataPath + // Otherwise, add it the list of successfully removed + removed = append(removed, name) + } return nil - } - - var err error - if vs.locked == nil { - err = lockutil.WithDirLock(vs.dir, fn) - } else { - err = fn() - } - if err != nil { - return nil, err - } + }) - return vol, nil + return removed, warns, err } -// Get retrieves a native volume from the store -// Besides a possible locking error, it might return ErrInvalidArgument, ErrNotFound, or a filesystem error -func (vs *volumeStore) Get(name string, size bool) (*native.Volume, error) { - if err := identifiers.ValidateDockerCompat(name); err != nil { - return nil, fmt.Errorf("invalid volume name: %w", err) - } - volPath := filepath.Join(vs.dir, name) - volDataPath := filepath.Join(volPath, dataDirName) - volFilePath := filepath.Join(volPath, volumeJSONFileName) +func (vs *volumeStore) Prune(filter func(vol []*native.Volume) ([]string, error)) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrVolumeStore, err) + } + }() - vol := &native.Volume{} + return vs.Locker.WithLock(func() error { + names, err := vs.manager.List() + if err != nil { + return err + } - fn := func() error { - if _, err := os.Stat(volDataPath); err != nil { - if os.IsNotExist(err) { - return fmt.Errorf("%q does not exist in the volume store: %w", name, errdefs.ErrNotFound) + res := []*native.Volume{} + for _, name := range names { + vol, err := vs.rawGet(name, false) + if err != nil { + log.L.WithError(err).Errorf("something is wrong with %q", name) + continue } - return fmt.Errorf("filesystem error reading %q from the volume store: %w", name, err) + res = append(res, vol) } - volumeDataBytes, err := os.ReadFile(volFilePath) + toDelete, err := filter(res) if err != nil { - if os.IsNotExist(err) { - return fmt.Errorf("%q labels file does not exist in the volume store: %w", name, errdefs.ErrNotFound) - } - return fmt.Errorf("filesystem error reading %q from the volume store: %w", name, err) + return err } - vol.Name = name - vol.Mountpoint = volDataPath - vol.Labels = labels(volumeDataBytes) - - if size { - vol.Size, err = volumeSize(vol) + for _, name := range toDelete { + err = vs.manager.Delete(name) if err != nil { - return fmt.Errorf("failed reading volume size for %q from the volume store: %w", name, err) + return err } } + return nil + }) +} + +func (vs *volumeStore) rawGet(name string, size bool) (vol *native.Volume, err error) { + content, err := vs.manager.Get(name, volumeJSONFileName) + if err != nil { + return nil, err } - var err error - if vs.locked == nil { - err = lockutil.WithDirLock(vs.dir, fn) - } else { - err = fn() + vol = &native.Volume{ + Name: name, + Labels: labels(content), } + + vol.Mountpoint, err = vs.manager.Location(name, dataDirName) if err != nil { return nil, err } + if size { + vol.Size, err = vs.manager.GroupSize(name, dataDirName) + if err != nil { + return nil, errors.Join(fmt.Errorf("failed reading volume size for %q", name), err) + } + } + return vol, nil } -// List retrieves all known volumes from the store. -// Besides a possible locking error, it might return ErrNotFound (indicative that the store is in a broken state), or a filesystem error -func (vs *volumeStore) List(size bool) (map[string]native.Volume, error) { - res := map[string]native.Volume{} +func (vs *volumeStore) rawCreate(name string, labels []string) (vol *native.Volume, err error) { + volOpts := struct { + Labels map[string]string `json:"labels"` + }{} - fn := func() error { - dirEntries, err := os.ReadDir(vs.dir) - if err != nil { - return fmt.Errorf("filesystem error while trying to list volumes from the volume store: %w", err) - } + if len(labels) > 0 { + volOpts.Labels = strutil.ConvertKVStringsToMap(labels) + } - for _, dirEntry := range dirEntries { - name := dirEntry.Name() - vol, err := vs.Get(name, size) - if err != nil { - return err - } - res[name] = *vol - } - return nil + // Failure here must exit, no need to clean-up + labelsJSON, err := json.MarshalIndent(volOpts, "", " ") + if err != nil { + return nil, err } - var err error - // Since we are calling Get, we need to acquire a global lock - if vs.locked == nil { - err = vs.Lock() - if err != nil { + if doesExist, err := vs.manager.Exists(name, volumeJSONFileName); err != nil { + return nil, err + } else if !doesExist { + if err = vs.manager.Set(labelsJSON, name, volumeJSONFileName); err != nil { return nil, err } - defer vs.Unlock() + } else { + log.L.Warnf("volume %q already exists and will be returned as-is", name) + // FIXME: we do not check if the existing volume has the same labels as requested - should we? } - err = fn() - if err != nil { - return nil, err + + // At this point, we either have an existing volume, or created a new one successfully + vol = &native.Volume{ + Name: name, } - return res, nil -} -// Remove will remove one or more containers -// Besides a possible locking error, it might return hard filesystem errors -// Any other failure (ErrInvalidArgument, ErrNotFound) is a soft error that will be added the `warns` -func (vs *volumeStore) Remove(names []string) (removed []string, warns []error, err error) { - fn := func() error { - for _, name := range names { - // Invalid name, soft error - if err := identifiers.ValidateDockerCompat(name); err != nil { - warns = append(warns, fmt.Errorf("invalid volume name: %w", err)) - continue - } - dir := filepath.Join(vs.dir, name) - // Does not exist, soft error - if _, err := os.Stat(dir); err != nil { - if os.IsNotExist(err) { - warns = append(warns, fmt.Errorf("no such volume: %s (%w)", name, errdefs.ErrNotFound)) - continue - } - return fmt.Errorf("filesystem error while trying to remove volumes from the volume store: %w", err) - } - // Hard filesystem error, hard error, and stop here - if err := os.RemoveAll(dir); err != nil { - return fmt.Errorf("filesystem error while trying to remove volumes from the volume store: %w", err) - } - // Otherwise, add it the list of successfully removed - removed = append(removed, name) - } - return nil + if err = vs.manager.GroupEnsure(name, dataDirName); err != nil { + return nil, err } - if vs.locked == nil { - err = lockutil.WithDirLock(vs.dir, fn) - } else { - err = fn() + if vol.Mountpoint, err = vs.manager.Location(name, dataDirName); err != nil { + return nil, err } - return removed, warns, err + return vol, nil } // Private helpers @@ -318,21 +395,3 @@ func labels(b []byte) *map[string]string { } return vo.Labels } - -func volumeSize(volume *native.Volume) (int64, error) { - var size int64 - var walkFn = func(_ string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - size += info.Size() - } - return err - } - var err = filepath.Walk(volume.Mountpoint, walkFn) - if err != nil { - return 0, err - } - return size, nil -} diff --git a/pkg/namestore/namestore.go b/pkg/namestore/namestore.go index a6b57968c82..6ded12d6c95 100644 --- a/pkg/namestore/namestore.go +++ b/pkg/namestore/namestore.go @@ -14,119 +14,167 @@ limitations under the License. */ +// Package namestore provides a simple store for containers to exclusively acquire and release names. +// All methods are safe to use concurrently. +// Note that locking of the store is done at the namespace level. +// The namestore is currently used by container create, remove, rename, and as part of the ocihook events cycle. package namestore import ( + "errors" "fmt" - "os" "path/filepath" - "strings" "github.com/containerd/log" "github.com/containerd/nerdctl/v2/pkg/identifiers" - "github.com/containerd/nerdctl/v2/pkg/lockutil" + "github.com/containerd/nerdctl/v2/pkg/store" ) -func New(dataStore, ns string) (NameStore, error) { - dir := filepath.Join(dataStore, "names", ns) - if err := os.MkdirAll(dir, 0700); err != nil { - return nil, err +// ErrNameStore will wrap all errors here +var ErrNameStore = errors.New("name-store error") + +// New will return a NameStore for a given namespace. +func New(stateDir, namespace string) (NameStore, error) { + if namespace == "" { + return nil, errors.Join(ErrNameStore, store.ErrInvalidArgument) } - store := &nameStore{ - dir: dir, + + st, err := store.New(filepath.Join(stateDir, namespace), 0, 0) + if err != nil { + return nil, errors.Join(ErrNameStore, err) } - return store, nil + + return &nameStore{ + safeStore: st, + }, nil } +// NameStore allows acquiring, releasing and renaming. +// "names" must abide by identifiers.ValidateDockerCompat +// A container cannot release or rename a name it does not own. +// A container cannot acquire a name that is already owned by another container. +// Re-acquiring a name does not error and is a no-op. +// Double releasing a name will error. +// Note that technically a given container may acquire multiple different names, although this is not +// something we do in the codebase. type NameStore interface { + // Acquire exclusively grants `name` to container with `id`. Acquire(name, id string) error + // Acquire allows the container owning a specific name to release it Release(name, id string) error + // Rename allows the container owning a specific name to change it to newName (if available) Rename(oldName, id, newName string) error } type nameStore struct { - dir string + safeStore store.Store } -func (x *nameStore) Acquire(name, id string) error { - if err := identifiers.ValidateDockerCompat(name); err != nil { - return fmt.Errorf("invalid name: %w", err) - } - if strings.TrimSpace(id) != id { - return fmt.Errorf("untrimmed ID %q", id) +func (x *nameStore) Acquire(name, id string) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrNameStore, err) + } + }() + + if err = identifiers.ValidateDockerCompat(name); err != nil { + return err } - fn := func() error { - fileName := filepath.Join(x.dir, name) - if b, err := os.ReadFile(fileName); err == nil { - if strings.TrimSpace(string(b)) == "" { - // currently acquired for an empty id - this obviously should never happen - // this is recoverable, and we are not hard erroring, but still indicative that something was wrong - // https://github.com/containerd/nerdctl/issues/3351 - log.L.Errorf("current name %q is reserved for a an empty id - please report this is as a bug", name) - } else if string(b) != id { - // if acquired by a different container, we error out here - return fmt.Errorf("name %q is already used by ID %q", name, string(b)) + + return x.safeStore.WithLock(func() error { + var previousID []byte + previousID, err = x.safeStore.Get(name) + if err != nil { + if !errors.Is(err, store.ErrNotFound) { + return err } - // Otherwise, this is just re-acquiring after a restart - // For example, if containerd was bounced, previously running containers that would get restarted will go - // again through onCreateRuntime (unlike in a "normal" stop/start flow). - // As such, we are allowing reacquiring by the same id - // See: https://github.com/containerd/nerdctl/issues/3354 + } else if string(previousID) == "" { + // This has happened in the past, probably following some other error condition of OS restart + // We do warn about it, but do not hard-error and let the new container acquire the name + log.L.Warnf("name %q was locked by an empty id - this is abnormal and should be reported", name) + } else if string(previousID) != id { + // If the name is already used by another container, that is a hard error + return fmt.Errorf("name %q is already used by ID %q", name, previousID) } - return os.WriteFile(fileName, []byte(id), 0600) - } - return lockutil.WithDirLock(x.dir, fn) + + // If the id was the same, we are "re-acquiring". + // Maybe containerd was bounced, so previously running containers that would get restarted will go again through + // onCreateRuntime (unlike in a "normal" stop/start flow), without ever had gone through onPostStop. + // As such, reacquiring by the same id is not a bug... + // See: https://github.com/containerd/nerdctl/issues/3354 + return x.safeStore.Set([]byte(id), name) + }) } -func (x *nameStore) Release(name, id string) error { - if name == "" { - return nil - } - if err := identifiers.ValidateDockerCompat(name); err != nil { - return fmt.Errorf("invalid name: %w", err) - } - if strings.TrimSpace(id) != id { - return fmt.Errorf("untrimmed ID %q", id) +func (x *nameStore) Release(name, id string) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrNameStore, err) + } + }() + + if err = identifiers.ValidateDockerCompat(name); err != nil { + return err } - fn := func() error { - fileName := filepath.Join(x.dir, name) - b, err := os.ReadFile(fileName) + + return x.safeStore.WithLock(func() error { + var content []byte + content, err = x.safeStore.Get(name) if err != nil { - if os.IsNotExist(err) { - err = nil - } return err } - if s := strings.TrimSpace(string(b)); s != id { - return fmt.Errorf("name %q is used by ID %q, not by %q", name, s, id) + + if string(content) != id { + // Never seen this, but technically possible if downstream code is messed-up + return fmt.Errorf("cannot release name %q (used by ID %q, not by %q)", name, content, id) } - return os.RemoveAll(fileName) - } - return lockutil.WithDirLock(x.dir, fn) + + return x.safeStore.Delete(name) + }) } -func (x *nameStore) Rename(oldName, id, newName string) error { - if oldName == "" || newName == "" { - return nil - } - if err := identifiers.ValidateDockerCompat(newName); err != nil { - return fmt.Errorf("invalid name %q: %w", newName, err) +func (x *nameStore) Rename(oldName, id, newName string) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrNameStore, err) + } + }() + + if err = identifiers.ValidateDockerCompat(newName); err != nil { + return err } - fn := func() error { - oldFileName := filepath.Join(x.dir, oldName) - b, err := os.ReadFile(oldFileName) + + return x.safeStore.WithLock(func() error { + var doesExist bool + var content []byte + doesExist, err = x.safeStore.Exists(newName) if err != nil { return err } - if s := strings.TrimSpace(string(b)); s != id { - return fmt.Errorf("name %q is used by ID %q, not by %q", oldName, s, id) + + if doesExist { + content, err = x.safeStore.Get(newName) + if err != nil { + return err + } + return fmt.Errorf("name %q is already used by ID %q", newName, string(content)) } - newFileName := filepath.Join(x.dir, newName) - if b, err := os.ReadFile(newFileName); err == nil { - return fmt.Errorf("name %q is already used by ID %q", newName, string(b)) + + content, err = x.safeStore.Get(oldName) + if err != nil { + return err } - return os.Rename(oldFileName, newFileName) - } - return lockutil.WithDirLock(x.dir, fn) + + if string(content) != id { + return fmt.Errorf("name %q is used by ID %q, not by %q", oldName, content, id) + } + + err = x.safeStore.Set(content, newName) + if err != nil { + return err + } + + return x.safeStore.Delete(oldName) + }) } diff --git a/pkg/ocihook/ocihook.go b/pkg/ocihook/ocihook.go index 922e6ff8c96..e67ec17c677 100644 --- a/pkg/ocihook/ocihook.go +++ b/pkg/ocihook/ocihook.go @@ -44,6 +44,7 @@ import ( "github.com/containerd/nerdctl/v2/pkg/netutil/nettype" "github.com/containerd/nerdctl/v2/pkg/ocihook/state" "github.com/containerd/nerdctl/v2/pkg/rootlessutil" + "github.com/containerd/nerdctl/v2/pkg/store" ) const ( @@ -408,7 +409,7 @@ func applyNetworkSettings(opts *handlerOpts) error { return err } ctx := context.Background() - hs, err := hostsstore.NewStore(opts.dataStore) + hs, err := hostsstore.New(opts.dataStore, opts.state.Annotations[labels.Namespace]) if err != nil { return err } @@ -436,7 +437,6 @@ func applyNetworkSettings(opts *handlerOpts) error { gocni.WithArgs("NERDCTL_CNI_DHCP_HOSTNAME", opts.state.Annotations[labels.Hostname]), ) hsMeta := hostsstore.Meta{ - Namespace: opts.state.Annotations[labels.Namespace], ID: opts.state.ID, Networks: make(map[string]*types100.Result, len(opts.cniNames)), Hostname: opts.state.Annotations[labels.Hostname], @@ -510,30 +510,46 @@ func onCreateRuntime(opts *handlerOpts) error { netError = applyNetworkSettings(opts) } - lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir]) + // Set StartedAt and CreateError + lf, err := state.New(opts.state.Annotations[labels.StateDir]) + if err != nil { + return err + } - return errors.Join(netError, lf.WithLock(func() error { - // Errors are voluntarily ignored here, as they should not be fatal. - // The lifecycle struct is also already warning about the issue. - _ = lf.Load() + err = lf.Transform(func(lf *state.Store) error { lf.StartedAt = time.Now() lf.CreateError = netError != nil - return lf.Save() - })) + return nil + }) + if err != nil { + return err + } + + return netError } func onPostStop(opts *handlerOpts) error { - // See https://github.com/containerd/nerdctl/issues/3357 - // Check if we actually errored during runtimeCreate - // If that is the case, CreateError is set, and we are in postStop while the container will NOT be deleted (see ticket). - // In that case, do NOT treat this as a deletion, as the container is still there. - // Reset CreateError, and return. - lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir]) - if lf.WithLock(lf.Load) == nil { - if lf.CreateError { - lf.CreateError = false - return lf.WithLock(lf.Save) - } + lf, err := state.New(opts.state.Annotations[labels.StateDir]) + if err != nil { + return err + } + + var shouldExit bool + err = lf.Transform(func(lf *state.Store) error { + // See https://github.com/containerd/nerdctl/issues/3357 + // Check if we actually errored during runtimeCreate + // If that is the case, CreateError is set, and we are in postStop while the container will NOT be deleted (see ticket). + // Thus, do NOT treat this as a deletion, as the container is still there. + // Reset CreateError, and return. + shouldExit = lf.CreateError + lf.CreateError = false + return nil + }) + if err != nil { + return err + } + if shouldExit { + return nil } ctx := context.Background() @@ -586,11 +602,11 @@ func onPostStop(opts *handlerOpts) error { log.L.WithError(err).Errorf("failed to call cni.Remove") return err } - hs, err := hostsstore.NewStore(opts.dataStore) + hs, err := hostsstore.New(opts.dataStore, ns) if err != nil { return err } - if err := hs.Release(ns, opts.state.ID); err != nil { + if err := hs.Release(opts.state.ID); err != nil { return err } } @@ -599,7 +615,8 @@ func onPostStop(opts *handlerOpts) error { return err } name := opts.state.Annotations[labels.Name] - if err := namst.Release(name, opts.state.ID); err != nil { + // Double-releasing may happen with containers started with --rm, so, ignore NotFound errors + if err := namst.Release(name, opts.state.ID); err != nil && !errors.Is(err, store.ErrNotFound) { return fmt.Errorf("failed to release container name %s: %w", name, err) } return nil diff --git a/pkg/ocihook/state/state.go b/pkg/ocihook/state/state.go index 312bf973556..52253d390fe 100644 --- a/pkg/ocihook/state/state.go +++ b/pkg/ocihook/state/state.go @@ -14,87 +14,114 @@ limitations under the License. */ +// Package state provides a store to retrieve and save container lifecycle related information +// This is typically used by oci-hooks for information that cannot be retrieved / updated otherwise +// Specifically, the state carries container start time, and transient information about possible failures during +// hook events processing. +// All store methods are safe to use concurrently and only write atomically. +// Since the state is transient and carrying solely informative data, errors returned from here could be treated as +// soft-failures. +// Note that locking is done at the container state directory level. +// state is currently used by ocihooks and for read by dockercompat (to display started-at time) package state import ( "encoding/json" "errors" - "fmt" - "os" - "path/filepath" "time" - "github.com/containerd/log" - - "github.com/containerd/nerdctl/v2/pkg/lockutil" + "github.com/containerd/nerdctl/v2/pkg/store" ) -// This is meant to store stateful informations about containers that we receive from ocihooks -// We are storing them inside the container statedir -// Note that you MUST use WithLock to perform any operation (like Read or Write). -// Typically: -// lf.WithLock(func ()error { -// lf.Load() -// // Modify something on the object -// lf.StartedAt = ... -// lf.Save() -// }) - -const ( - lifecycleFile = "lifecycle.json" -) +// lifecycleFile is the name of file carrying the container information, relative to stateDir +const lifecycleFile = "lifecycle.json" -func NewLifecycleState(stateDir string) *LifecycleState { - return &LifecycleState{ - stateDir: stateDir, +// ErrLifecycleStore will wrap all errors here +var ErrLifecycleStore = errors.New("lifecycle-store error") + +// New will return a lifecycle struct for the container which stateDir is passed as argument +func New(stateDir string) (*Store, error) { + st, err := store.New(stateDir, 0, 0) + if err != nil { + return nil, errors.Join(ErrLifecycleStore, err) } + + return &Store{ + safeStore: st, + }, nil } -type LifecycleState struct { - stateDir string +// Store exposes methods to retrieve and transform state information about containers. +type Store struct { + safeStore store.Store + + // StartedAt reflects the time at which we received the oci-hook onCreateRuntime event StartedAt time.Time `json:"started_at"` CreateError bool `json:"create_error"` } -func (lf *LifecycleState) WithLock(fun func() error) error { - err := lockutil.WithDirLock(lf.stateDir, fun) - if err != nil { - return fmt.Errorf("failed to lock state dir: %w", err) - } +// Load will populate the struct with existing in-store lifecycle information +func (lf *Store) Load() (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrLifecycleStore, err) + } + }() - return nil + return lf.safeStore.WithLock(lf.rawLoad) } -func (lf *LifecycleState) Load() error { - data, err := os.ReadFile(filepath.Join(lf.stateDir, lifecycleFile)) - if err != nil { - if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("unable to read lifecycle file: %w", err) +// Transform should be used to perform random mutations +func (lf *Store) Transform(fun func(lf *Store) error) (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrLifecycleStore, err) } - } else { - err = json.Unmarshal(data, lf) + }() + + return lf.safeStore.WithLock(func() error { + err = lf.rawLoad() + if err != nil { + return err + } + err = fun(lf) if err != nil { - // Logging an error, as Load errors are generally ignored downstream - log.L.Error("unable to unmarshall lifecycle data") - return fmt.Errorf("unable to unmarshall lifecycle data: %w", err) + return err } + return lf.rawSave() + }) +} + +// Delete will destroy the lifecycle data +func (lf *Store) Delete() (err error) { + defer func() { + if err != nil { + err = errors.Join(ErrLifecycleStore, err) + } + }() + + return lf.safeStore.WithLock(lf.rawDelete) +} + +func (lf *Store) rawLoad() (err error) { + data, err := lf.safeStore.Get(lifecycleFile) + if err == nil { + err = json.Unmarshal(data, lf) + } else if errors.Is(err, store.ErrNotFound) { + err = nil } - return nil + + return err } -func (lf *LifecycleState) Save() error { - // Write atomically (write, then move) to avoid incomplete writes from happening +func (lf *Store) rawSave() (err error) { data, err := json.Marshal(lf) if err != nil { - return fmt.Errorf("unable to marshall lifecycle data: %w", err) - } - err = os.WriteFile(filepath.Join(lf.stateDir, "."+lifecycleFile), data, 0600) - if err != nil { - return fmt.Errorf("unable to write lifecycle file: %w", err) + return err } - err = os.Rename(filepath.Join(lf.stateDir, "."+lifecycleFile), filepath.Join(lf.stateDir, lifecycleFile)) - if err != nil { - return fmt.Errorf("unable to write lifecycle file: %w", err) - } - return nil + return lf.safeStore.Set(data, lifecycleFile) +} + +func (lf *Store) rawDelete() (err error) { + return lf.safeStore.Delete(lifecycleFile) }