diff --git a/storage/docs/containers-storage.conf.5.md b/storage/docs/containers-storage.conf.5.md index bc4d5d0b26..3dbc6556e5 100644 --- a/storage/docs/containers-storage.conf.5.md +++ b/storage/docs/containers-storage.conf.5.md @@ -219,6 +219,16 @@ based file systems. Use ComposeFS to mount the data layers image. ComposeFS support is experimental and not recommended for production use. This is a "string bool": "false"|"true" (cannot be native TOML boolean) +**sync**="none|filesystem" + Filesystem synchronization mode for layer creation. (default: "none") + +- `none`: No synchronization. + Layer operations complete without calling syncfs(). + +- `filesystem`: Sync before completion. + Flush all pending writes to the file system before marking the layer + as present. This helps prevent data corruption if the system + crashes or loses power during layer operations. ### STORAGE OPTIONS FOR VFS TABLE @@ -228,6 +238,16 @@ The `storage.options.vfs` table supports the following options: ignore_chown_errors can be set to allow a non privileged user running with a single UID within a user namespace to run containers. The user can pull and use any image even those with multiple uids. Note multiple UIDs will be squashed down to the default uid in the container. These images will have no separation between the users in the container. This is a "string bool": "false"|"true" (cannot be native TOML boolean) +**sync**="" + Filesystem synchronization mode for layer creation. (default: "") + +- `none`: No synchronization + Layer operations complete without calling syncfs(). + +- `filesystem`: Sync before completion + Flush all pending writes to the file system before marking the layer + as present. This helps prevent data corruption if the system + crashes or loses power during layer operations. ### STORAGE OPTIONS FOR ZFS TABLE diff --git a/storage/drivers/btrfs/btrfs.go b/storage/drivers/btrfs/btrfs.go index aba898ed55..c1ea5cb3a5 100644 --- a/storage/drivers/btrfs/btrfs.go +++ b/storage/drivers/btrfs/btrfs.go @@ -157,6 +157,12 @@ func (d *Driver) Cleanup() error { return mount.Unmount(d.home) } +// SyncMode returns the sync mode configured for the driver. +// Btrfs does not support sync mode configuration, always returns SyncModeNone. +func (d *Driver) SyncMode() graphdriver.SyncMode { + return graphdriver.SyncModeNone +} + func free(p *C.char) { C.free(unsafe.Pointer(p)) } diff --git a/storage/drivers/driver.go b/storage/drivers/driver.go index 38706dc999..1c9c8d24f6 100644 --- a/storage/drivers/driver.go +++ b/storage/drivers/driver.go @@ -17,6 +17,7 @@ import ( "go.podman.io/storage/pkg/directory" "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/idtools" + "go.podman.io/storage/pkg/system" ) // FsMagic unsigned id of the filesystem in use. @@ -27,6 +28,51 @@ const ( FsMagicUnsupported = FsMagic(0x00000000) ) +// SyncMode defines when filesystem synchronization occurs during layer creation. +type SyncMode int + +const ( + // SyncModeNone - no synchronization + SyncModeNone SyncMode = iota + // SyncModeFilesystem - use syncfs() before layer marked as present + SyncModeFilesystem +) + +// String returns the string representation of the sync mode +func (m SyncMode) String() string { + switch m { + case SyncModeNone: + return "none" + case SyncModeFilesystem: + return "filesystem" + default: + return "unknown" + } +} + +// ParseSyncMode converts a string to SyncMode +func ParseSyncMode(s string) (SyncMode, error) { + switch strings.ToLower(strings.TrimSpace(s)) { + case "", "none": + return SyncModeNone, nil + case "filesystem": + return SyncModeFilesystem, nil + default: + return SyncModeNone, fmt.Errorf("invalid sync mode: %s", s) + } +} + +// Sync applies the sync mode to the given path. +// Returns nil if mode is SyncModeNone or if sync succeeds. +func (m SyncMode) Sync(path string) error { + if m == SyncModeFilesystem { + if err := system.Syncfs(path); err != nil { + return fmt.Errorf("syncing file system for %q: %w", path, err) + } + } + return nil +} + var ( // All registered drivers drivers map[string]InitFunc @@ -169,6 +215,8 @@ type ProtoDriver interface { AdditionalImageStores() []string // Dedup performs deduplication of the driver's storage. Dedup(DedupArgs) (DedupResult, error) + // SyncMode returns the sync mode configured for the driver. + SyncMode() SyncMode } // DiffDriver is the interface to use to implement graph diffs diff --git a/storage/drivers/overlay/overlay.go b/storage/drivers/overlay/overlay.go index 2eb720c188..d61d6ea046 100644 --- a/storage/drivers/overlay/overlay.go +++ b/storage/drivers/overlay/overlay.go @@ -114,6 +114,7 @@ type overlayOptions struct { ignoreChownErrors bool forceMask *os.FileMode useComposefs bool + syncMode graphdriver.SyncMode } // Driver contains information about the home directory and the list of active mounts that are created using this driver. @@ -594,6 +595,19 @@ func parseOptions(options []string) (*overlayOptions, error) { } m := os.FileMode(mask) o.forceMask = &m + case "sync": + logrus.Debugf("overlay: sync=%s", val) + mode, err := graphdriver.ParseSyncMode(val) + if err != nil { + return nil, fmt.Errorf("invalid sync mode for overlay driver: %w", err) + } + // SyncModeNone and SyncModeFilesystem do not need any special handling because + // the overlay storage is always on the same file system as the metadata, thus + // the Syncfs() in layers.go cover also any file written by the overlay driver. + if mode != SyncModeNone || mode != SyncModeFilesystem { + return nil, fmt.Errorf("invalid mode for overlay driver: %q", val) + } + o.syncMode = mode default: return nil, fmt.Errorf("overlay: unknown option %s", key) } @@ -866,6 +880,11 @@ func (d *Driver) Cleanup() error { return mount.Unmount(d.home) } +// SyncMode returns the sync mode configured for the driver. +func (d *Driver) SyncMode() graphdriver.SyncMode { + return d.options.syncMode +} + // pruneStagingDirectories cleans up any staging directory that was leaked. // It returns whether any staging directory is still present. func (d *Driver) pruneStagingDirectories() bool { @@ -2018,12 +2037,8 @@ func (d *Driver) Put(id string) error { // If fusermount|fusermount3 failed to unmount the FUSE file system, make sure all // pending changes are propagated to the file system if !unmounted { - fd, err := unix.Open(mountpoint, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) - if err == nil { - if err := unix.Syncfs(fd); err != nil { - logrus.Debugf("Error Syncfs(%s) - %v", mountpoint, err) - } - unix.Close(fd) + if err := system.Syncfs(mountpoint); err != nil { + logrus.Debugf("Error Syncfs(%s) - %v", mountpoint, err) } } } diff --git a/storage/drivers/vfs/driver.go b/storage/drivers/vfs/driver.go index b90c2046cf..ad43f60d37 100644 --- a/storage/drivers/vfs/driver.go +++ b/storage/drivers/vfs/driver.go @@ -64,6 +64,19 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) if err != nil { return nil, err } + case "vfs.sync", ".sync": + logrus.Debugf("vfs: sync=%s", val) + var err error + d.syncMode, err = graphdriver.ParseSyncMode(val) + // SyncModeNone and SyncModeFilesystem do not need any special handling because + // the vfs storage is always on the same file system as the metadata, thus the + // Syncfs() in layers.go cover also any file written by the vfs driver. + if mode != SyncModeNone || mode != SyncModeFilesystem { + return nil, fmt.Errorf("invalid mode for vfs driver: %q", val) + } + if err != nil { + return nil, fmt.Errorf("invalid sync mode for vfs driver: %w", err) + } default: return nil, fmt.Errorf("vfs driver does not support %s options", key) } @@ -84,6 +97,7 @@ type Driver struct { home string additionalHomes []string ignoreChownErrors bool + syncMode graphdriver.SyncMode naiveDiff graphdriver.DiffDriver updater graphdriver.LayerIDMapUpdater imageStore string @@ -108,6 +122,11 @@ func (d *Driver) Cleanup() error { return nil } +// SyncMode returns the sync mode configured for the driver. +func (d *Driver) SyncMode() graphdriver.SyncMode { + return d.syncMode +} + type fileGetNilCloser struct { storage.FileGetter } @@ -136,7 +155,12 @@ func (d *Driver) ApplyDiff(id string, options graphdriver.ApplyDiffOpts) (size i if d.ignoreChownErrors { options.IgnoreChownErrors = d.ignoreChownErrors } - return d.naiveDiff.ApplyDiff(id, options) + size, err = d.naiveDiff.ApplyDiff(id, options) + if err != nil { + return 0, err + } + + return size, nil } // CreateReadWrite creates a layer that is writable for use as a container diff --git a/storage/drivers/vfs/vfs_test.go b/storage/drivers/vfs/vfs_test.go index b1e8ca54a3..b572a73e60 100644 --- a/storage/drivers/vfs/vfs_test.go +++ b/storage/drivers/vfs/vfs_test.go @@ -5,8 +5,8 @@ package vfs import ( "testing" + graphdriver "go.podman.io/storage/drivers" "go.podman.io/storage/drivers/graphtest" - "go.podman.io/storage/pkg/reexec" ) @@ -52,6 +52,23 @@ func TestVfsListLayers(t *testing.T) { graphtest.DriverTestListLayers(t, "vfs") } +func TestVfsSyncModeParsing(t *testing.T) { + _, err := graphdriver.ParseSyncMode("invalid") + if err == nil { + t.Error("Expected error for invalid sync mode, got nil") + } + + _, err = graphdriver.ParseSyncMode("filesystem") + if err != nil { + t.Errorf("Expected no error for valid sync mode 'filesystem', got %v", err) + } + + _, err = graphdriver.ParseSyncMode("none") + if err != nil { + t.Errorf("Expected no error for valid sync mode 'none', got %v", err) + } +} + func TestVfsTeardown(t *testing.T) { graphtest.PutDriver(t) } diff --git a/storage/drivers/zfs/zfs.go b/storage/drivers/zfs/zfs.go index b994278bb2..4211a570e4 100644 --- a/storage/drivers/zfs/zfs.go +++ b/storage/drivers/zfs/zfs.go @@ -183,6 +183,12 @@ func (d *Driver) Cleanup() error { return nil } +// SyncMode returns the sync mode configured for the driver. +// ZFS does not support sync mode configuration, always returns SyncModeNone. +func (d *Driver) SyncMode() graphdriver.SyncMode { + return graphdriver.SyncModeNone +} + // Status returns information about the ZFS filesystem. It returns a two dimensional array of information // such as pool name, dataset name, disk usage, parent quota and compression used. // Currently it return 'Zpool', 'Zpool Health', 'Parent Dataset', 'Space Used By Parent', diff --git a/storage/layers.go b/storage/layers.go index d485c9b4fa..6ba1ac6d9b 100644 --- a/storage/layers.go +++ b/storage/layers.go @@ -1139,6 +1139,13 @@ func (r *layerStore) saveLayers(saveLocations layerLocations) error { if location == volatileLayerLocation { opts.NoSync = true } + // If the underlying storage driver is using sync, make sure we sync everything before + // saving the layer data, this ensures that all files/directories are properly created and written. + if r.driver.SyncMode() != drivers.SyncModeNone { + if err := system.Syncfs(filepath.Dir(rpath)); err != nil { + return err + } + } if err := ioutils.AtomicWriteFileWithOpts(rpath, jldata, 0o600, &opts); err != nil { return err } @@ -1610,14 +1617,8 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount } } - if oldMappings != nil && - (!reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs())) { - if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil { - cleanupFailureContext = "in UpdateLayerIDMap" - return nil, -1, err - } - } - + // Write the tar-split file, and especially creating new directories before UpdateLayerIDMap so any sync policy + // done there will also cover the new directory created. if len(templateTSdata) > 0 { if err = os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil { cleanupFailureContext = "creating tar-split parent directory for a copy from template" @@ -1629,6 +1630,14 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount } } + if oldMappings != nil && + (!reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs())) { + if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil { + cleanupFailureContext = "in UpdateLayerIDMap" + return nil, -1, err + } + } + size = -1 if contents != nil { if contents.stagedLayerExtraction != nil { @@ -2796,9 +2805,6 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver } maps.Copy(layer.Flags, options.Flags) } - if err = r.saveFor(layer); err != nil { - return err - } if diffOutput.TarSplit != nil { tarSplitFile, err := createTarSplitFile(r, layer.ID) @@ -2839,6 +2845,7 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver return fmt.Errorf("sync tar-split file: %w", err) } } + for k, v := range diffOutput.BigData { if err := r.SetBigData(id, k, bytes.NewReader(v)); err != nil { if err2 := r.deleteWhileHoldingLock(id); err2 != nil { @@ -2847,6 +2854,10 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver return err } } + + if err = r.saveFor(layer); err != nil { + return err + } return err } diff --git a/storage/pkg/config/config.go b/storage/pkg/config/config.go index bf350c43cc..d10e7a7188 100644 --- a/storage/pkg/config/config.go +++ b/storage/pkg/config/config.go @@ -31,12 +31,16 @@ type OverlayOptionsConfig struct { // ForceMask indicates the permissions mask (e.g. "0755") to use for new // files and directories ForceMask string `toml:"force_mask,omitempty"` + // Sync controls filesystem sync during layer creation + Sync string `toml:"sync,omitempty"` } type VfsOptionsConfig struct { // IgnoreChownErrors is a flag for whether chown errors should be // ignored when building an image. IgnoreChownErrors string `toml:"ignore_chown_errors,omitempty"` + // Sync controls filesystem sync during layer creation + Sync string `toml:"sync,omitempty"` } type ZfsOptionsConfig struct { @@ -177,12 +181,18 @@ func GetGraphDriverOptions(driverName string, options OptionsConfig) []string { if options.Overlay.UseComposefs != "" { doptions = append(doptions, fmt.Sprintf("%s.use_composefs=%s", driverName, options.Overlay.UseComposefs)) } + if options.Overlay.Sync != "" { + doptions = append(doptions, fmt.Sprintf("%s.sync=%s", driverName, options.Overlay.Sync)) + } case "vfs": if options.Vfs.IgnoreChownErrors != "" { doptions = append(doptions, fmt.Sprintf("%s.ignore_chown_errors=%s", driverName, options.Vfs.IgnoreChownErrors)) } else if options.IgnoreChownErrors != "" { doptions = append(doptions, fmt.Sprintf("%s.ignore_chown_errors=%s", driverName, options.IgnoreChownErrors)) } + if options.Vfs.Sync != "" { + doptions = append(doptions, fmt.Sprintf("%s.sync=%s", driverName, options.Vfs.Sync)) + } case "zfs": if options.Zfs.Name != "" { diff --git a/storage/pkg/config/config_test.go b/storage/pkg/config/config_test.go index 477abb53ff..66e2473e6f 100644 --- a/storage/pkg/config/config_test.go +++ b/storage/pkg/config/config_test.go @@ -259,3 +259,35 @@ func TestZfsOptions(t *testing.T) { t.Fatalf("Expected to find size %q, got %v", s100, doptions) } } + +func TestSyncOptions(t *testing.T) { + var options OptionsConfig + + // Test overlay driver sync mode + options.Overlay.Sync = "filesystem" + doptions := GetGraphDriverOptions("overlay", options) + if !searchOptions(doptions, "overlay.sync=filesystem") { + t.Fatalf("Expected to find overlay sync option in %v", doptions) + } + + // Test VFS driver + options = OptionsConfig{} + options.Vfs.Sync = "filesystem" + doptions = GetGraphDriverOptions("vfs", options) + if !searchOptions(doptions, "vfs.sync=filesystem") { + t.Fatalf("Expected to find vfs sync option in %v", doptions) + } + + // Test empty configuration (no sync options should be added) + options = OptionsConfig{} + doptions = GetGraphDriverOptions("overlay", options) + if searchOptions(doptions, "sync=") { + t.Fatalf("Expected no sync option when not configured, got %v", doptions) + } + + options = OptionsConfig{} + doptions = GetGraphDriverOptions("vfs", options) + if searchOptions(doptions, "sync=") { + t.Fatalf("Expected no sync option for vfs when not configured, got %v", doptions) + } +} diff --git a/storage/pkg/system/syncfs_linux.go b/storage/pkg/system/syncfs_linux.go new file mode 100644 index 0000000000..b8c612640b --- /dev/null +++ b/storage/pkg/system/syncfs_linux.go @@ -0,0 +1,23 @@ +//go:build linux + +package system + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +// Syncfs synchronizes the filesystem containing the given path. +func Syncfs(path string) error { + fd, err := unix.Open(path, unix.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("open for syncfs: %w", err) + } + defer unix.Close(fd) + + if err := unix.Syncfs(fd); err != nil { + return fmt.Errorf("syncfs: %w", err) + } + return nil +} diff --git a/storage/pkg/system/syncfs_unix.go b/storage/pkg/system/syncfs_unix.go new file mode 100644 index 0000000000..16068124a6 --- /dev/null +++ b/storage/pkg/system/syncfs_unix.go @@ -0,0 +1,13 @@ +//go:build unix && !linux + +package system + +import "golang.org/x/sys/unix" + +// Syncfs synchronizes the filesystem containing the given path. +// On non-Linux Unix platforms, this falls back to sync(2) which +// syncs all filesystems. +func Syncfs(path string) error { + unix.Sync() + return nil +} diff --git a/storage/pkg/system/syncfs_windows.go b/storage/pkg/system/syncfs_windows.go new file mode 100644 index 0000000000..1b208ba2d4 --- /dev/null +++ b/storage/pkg/system/syncfs_windows.go @@ -0,0 +1,10 @@ +//go:build windows + +package system + +import "errors" + +// Syncfs is not supported on Windows. +func Syncfs(path string) error { + return errors.New("syncfs is not supported on Windows") +} diff --git a/storage/storage.conf b/storage/storage.conf index 2fff0cecf2..ef755de274 100644 --- a/storage/storage.conf +++ b/storage/storage.conf @@ -176,3 +176,12 @@ mountopt = "nodev" # "force_mask" permissions. # # force_mask = "" + +# Sync filesystem before marking layer as present. Filesystem must support syncfs. +# Values: "none", "filesystem" +# sync = "none" + +[storage.options.vfs] +# Sync filesystem before marking layer as present. Filesystem must support syncfs. +# Values: "none", "filesystem" +# sync = "none"