From b513929a7e357d1d1d245123f61cc66778d75251 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Wed, 10 Dec 2025 15:57:13 -0500 Subject: [PATCH 1/2] Restore runc kill all behavior for init processes PR #2538 removed the `runc kill --all` flag, when signaling containers. However, when cleaning up after the init process exists, the `--all` flag is still needed to remove any potentially orphaned processes when using runc before v1.2. See: https://github.com/opencontainers/runc/commit/f8ad20f500bf75edd86041657ee762bce116f8f5#diff-ade6035c3e554d7627cdc368b27f475fc0dad83e02382a1dea9cae9b75871087 Additionally, switch to using error strings directly from runc code in `internal\guest\runtime\runc\utils.go`: they have been available since runc v1.1.0-rc.1. See: https://github.com/opencontainers/runc/pull/3033 Also, add logic to match on container not/still running error strings and return them for `Kill`, since returning `ERROR_VMCOMPUTE_SYSTEM_ALREADY_STOPPED` (`0xc0370110`) when killing a stopped container is expected behavior and handled appropriately in `"cmd/containerd-shim-runhcs-v1".(*hcsExec).Kill()`. Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 2 +- internal/bridgeutils/gcserr/errors.go | 3 ++- internal/guest/runtime/hcsv2/container.go | 5 +++- internal/guest/runtime/runc/container.go | 30 +++++++++++++++++++++-- internal/guest/runtime/runc/process.go | 4 +-- internal/guest/runtime/runc/utils.go | 18 +++++++------- 6 files changed, 45 insertions(+), 17 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index ae4b391cd6..41e3c2e743 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -446,7 +446,7 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all if signal == 0x9 && eid == "" && ht.host != nil { // If this is a SIGKILL against the init process we start a background // timer and wait on either the timer expiring or the process exiting - // cleanly. If the timer exires first we forcibly close the UVM as we + // cleanly. If the timer expires first we forcibly close the UVM as we // assume the guest is misbehaving for some reason. go func() { t := time.NewTimer(30 * time.Second) diff --git a/internal/bridgeutils/gcserr/errors.go b/internal/bridgeutils/gcserr/errors.go index 2acbffb754..e282c62a9b 100644 --- a/internal/bridgeutils/gcserr/errors.go +++ b/internal/bridgeutils/gcserr/errors.go @@ -10,6 +10,7 @@ import ( // Hresult is a type corresponding to the HRESULT error type used on Windows. type Hresult int32 +// ! Must match error values in internal\hcs\errors.go // from // - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/705fb797-2175-4a90-b5a3-3918024b10b8 // - https://docs.microsoft.com/en-us/virtualization/api/hcs/reference/hcshresult @@ -51,7 +52,7 @@ const ( // // The virtual machine or container with the specified identifier is not // running. - HrVmcomputeSystemAlreadyStopped = Hresult(-2143878896) // 0x80370110 + HrVmcomputeSystemAlreadyStopped = Hresult(-1070137072) // 0xC0370110 ) // TODO: update implementation to use go1.13 style errors with `errors.As` and co. diff --git a/internal/guest/runtime/hcsv2/container.go b/internal/guest/runtime/hcsv2/container.go index 62f8ca3e43..bb9c3af5ea 100644 --- a/internal/guest/runtime/hcsv2/container.go +++ b/internal/guest/runtime/hcsv2/container.go @@ -214,7 +214,10 @@ func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) { // Kill sends 'signal' to the container process. func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error { - log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Kill") + log.G(ctx).WithFields(logrus.Fields{ + logfields.ContainerID: c.id, + "signal": signal.String(), + }).Info("opengcs::Container::Kill") err := c.container.Kill(signal) if err != nil { return err diff --git a/internal/guest/runtime/runc/container.go b/internal/guest/runtime/runc/container.go index 00418f99ee..405f284e72 100644 --- a/internal/guest/runtime/runc/container.go +++ b/internal/guest/runtime/runc/container.go @@ -76,14 +76,40 @@ func (c *container) ExecProcess(process *oci.Process, stdioSet *stdio.Connection // Kill sends the specified signal to the container's init process. func (c *container) Kill(signal syscall.Signal) error { - logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::Kill") + logrus.WithFields(logrus.Fields{ + logfields.ContainerID: c.id, + "signal": signal.String(), + }).Debug("runc::container::Kill") + return c.kill(signal, false) +} + +// killAll terminates all processes started in the container. +// +// Note: [runc deprecated] the `kill --all` flag starting in v1.2, but, prior to that, it was required +// to kill all processes within the container after the init exits. +// Until we can guarantee that the runc version is greater than 1.1 and runc explicitly removes the option, +// keep using it here. +// This mirrors how upstream containerd's runc handles [init exit] via [kill all]. +// +// [runc deprecated]: https://github.com/opencontainers/runc/pull/3825 +// [init exit]: https://github.com/containerd/containerd/blob/48baa31a0ad1ca1121ddaf968d3b8aa68c40bf84/cmd/containerd-shim-runc-v2/task/service.go#L725 +// [kill all]: https://github.com/containerd/containerd/blob/48baa31a0ad1ca1121ddaf968d3b8aa68c40bf84/cmd/containerd-shim-runc-v2/process/init.go#L375 +func (c *container) killAll() error { + logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::killAll") + return c.kill(syscall.SIGKILL, true) +} + +func (c *container) kill(signal syscall.Signal, all bool) error { args := []string{"kill"} + if all { + args = append(args, "--all") + } args = append(args, c.id, strconv.Itoa(int(signal))) cmd := runcCommand(args...) out, err := cmd.CombinedOutput() if err != nil { runcErr := parseRuncError(string(out)) - return errors.Wrapf(runcErr, "unknown runc error after kill %v: %s", err, string(out)) + return errors.Wrapf(runcErr, "runc kill failed with %v: %s", err, string(out)) } return nil } diff --git a/internal/guest/runtime/runc/process.go b/internal/guest/runtime/runc/process.go index 1190e4472f..3db4ffadc5 100644 --- a/internal/guest/runtime/runc/process.go +++ b/internal/guest/runtime/runc/process.go @@ -4,8 +4,6 @@ package runc import ( - "syscall" - "github.com/Microsoft/hcsshim/internal/guest/runtime" "github.com/Microsoft/hcsshim/internal/guest/stdio" "github.com/Microsoft/hcsshim/internal/logfields" @@ -65,7 +63,7 @@ func (p *process) Wait() (int, error) { // If the init process of a pid namespace terminates, the kernel // terminates all other processes in the namespace with SIGKILL. We // simulate the same behavior. - if err := p.c.Kill(syscall.SIGKILL); err != nil { + if err := p.c.killAll(); err != nil { l.WithError(err).Error("failed to terminate container after process wait") } } diff --git a/internal/guest/runtime/runc/utils.go b/internal/guest/runtime/runc/utils.go index 38535ccbab..1112422f17 100644 --- a/internal/guest/runtime/runc/utils.go +++ b/internal/guest/runtime/runc/utils.go @@ -12,6 +12,7 @@ import ( "strings" "syscall" + "github.com/opencontainers/runc/libcontainer" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -124,20 +125,19 @@ func (l *standardLogEntry) asError() (err error) { } func parseRuncError(s string) (err error) { - // TODO (helsaawy): match with errors from - // https://github.com/opencontainers/runc/blob/master/libcontainer/error.go if strings.HasPrefix(s, "container") && strings.HasSuffix(s, "does not exist") { - // currently: "container does not exist" + // match "container %q does not exist" and [libcontainer.ErrNotExist] err = runtime.ErrContainerDoesNotExist - } else if strings.Contains(s, "container with id exists") || - strings.Contains(s, "container with given ID already exists") { + } else if strings.Contains(s, "container with id exists") || strings.Contains(s, libcontainer.ErrExist.Error()) { err = runtime.ErrContainerAlreadyExists - } else if strings.Contains(s, "invalid id format") || - strings.Contains(s, "invalid container ID format") { + } else if strings.Contains(s, "invalid id format") || strings.Contains(s, libcontainer.ErrInvalidID.Error()) { err = runtime.ErrInvalidContainerID - } else if strings.Contains(s, "container") && - strings.Contains(s, "that is not stopped") { + } else if strings.Contains(s, "container") && strings.Contains(s, "that is not stopped") { err = runtime.ErrContainerNotStopped + } else if strings.Contains(s, libcontainer.ErrRunning.Error()) { + err = runtime.ErrContainerStillRunning + } else if strings.Contains(s, libcontainer.ErrNotRunning.Error()) { + err = runtime.ErrContainerNotRunning } else { err = errors.New(s) } From fc06d3a288568f0fdb2c8bef2f4c73788f128061 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Wed, 10 Dec 2025 16:10:18 -0500 Subject: [PATCH 2/2] go mod tidy and vendor; fix lint issues Fix lint errors (introduced by go1.24): ``` printf: non-constant format string in call to github.com/pkg/errors.Wrapf (govet) ``` Signed-off-by: Hamza El-Saawy --- go.mod | 15 +- go.sum | 34 +- internal/guest/runtime/runc/utils.go | 2 +- internal/tools/uvmboot/main.go | 2 +- test/go.mod | 10 +- test/go.sum | 33 +- vendor/cyphar.com/go-pathrs/.golangci.yml | 43 + vendor/cyphar.com/go-pathrs/COPYING | 373 +++ vendor/cyphar.com/go-pathrs/doc.go | 14 + vendor/cyphar.com/go-pathrs/handle_linux.go | 114 + .../go-pathrs/internal/fdutils/fd_linux.go | 75 + .../internal/libpathrs/error_unix.go | 40 + .../internal/libpathrs/libpathrs_linux.go | 337 +++ .../go-pathrs/procfs/procfs_linux.go | 246 ++ vendor/cyphar.com/go-pathrs/root_linux.go | 367 +++ vendor/cyphar.com/go-pathrs/utils_linux.go | 56 + .../checkpoint-restore/go-criu/v6/.gitignore | 13 + .../go-criu/v6/.golangci.yml | 10 + .../checkpoint-restore/go-criu/v6/LICENSE | 201 ++ .../checkpoint-restore/go-criu/v6/Makefile | 41 + .../checkpoint-restore/go-criu/v6/README.md | 105 + .../checkpoint-restore/go-criu/v6/features.go | 45 + .../checkpoint-restore/go-criu/v6/main.go | 264 ++ .../checkpoint-restore/go-criu/v6/notify.go | 62 + .../go-criu/v6/rpc/rpc.pb.go | 2327 +++++++++++++++++ .../go-criu/v6/rpc/rpc.proto | 248 ++ .../containerd/console/console_other.go | 4 +- .../containerd/console/console_unix.go | 9 + .../containerd/console/tc_darwin.go | 5 +- .../containerd/console/tc_freebsd_cgo.go | 5 +- .../containerd/console/tc_freebsd_nocgo.go | 5 +- .../github.com/containerd/console/tc_linux.go | 5 +- .../containerd/console/tc_netbsd.go | 5 +- .../containerd/console/tc_openbsd_cgo.go | 6 +- .../containerd/console/tc_openbsd_nocgo.go | 6 +- .../github.com/containerd/console/tc_zos.go | 5 +- .../cyphar/filepath-securejoin/.golangci.yml | 60 + .../cyphar/filepath-securejoin/CHANGELOG.md | 461 ++++ .../cyphar/filepath-securejoin/COPYING.md | 447 ++++ .../cyphar/filepath-securejoin/LICENSE.BSD | 28 + .../filepath-securejoin/LICENSE.MPL-2.0 | 373 +++ .../cyphar/filepath-securejoin/README.md | 184 ++ .../cyphar/filepath-securejoin/VERSION | 1 + .../cyphar/filepath-securejoin/codecov.yml | 29 + .../cyphar/filepath-securejoin/doc.go | 47 + .../internal/consts/consts.go | 15 + .../cyphar/filepath-securejoin/join.go | 169 ++ .../filepath-securejoin/pathrs-lite/README.md | 35 + .../filepath-securejoin/pathrs-lite/doc.go | 16 + .../pathrs-lite/internal/assert/assert.go | 30 + .../pathrs-lite/internal/errors_linux.go | 41 + .../pathrs-lite/internal/fd/at_linux.go | 148 ++ .../pathrs-lite/internal/fd/fd.go | 55 + .../pathrs-lite/internal/fd/fd_linux.go | 78 + .../pathrs-lite/internal/fd/mount_linux.go | 54 + .../pathrs-lite/internal/fd/openat2_linux.go | 62 + .../pathrs-lite/internal/gocompat/README.md | 10 + .../pathrs-lite/internal/gocompat/doc.go | 13 + .../gocompat/gocompat_errors_go120.go | 19 + .../gocompat/gocompat_errors_unsupported.go | 40 + .../gocompat/gocompat_generics_go121.go | 53 + .../gocompat/gocompat_generics_unsupported.go | 187 ++ .../pathrs-lite/internal/gopathrs/doc.go | 16 + .../internal/gopathrs/lookup_linux.go | 399 +++ .../internal/gopathrs/mkdir_linux.go | 212 ++ .../internal/gopathrs/open_linux.go | 26 + .../internal/gopathrs/openat2_linux.go | 101 + .../internal/kernelversion/kernel_linux.go | 123 + .../pathrs-lite/internal/linux/doc.go | 12 + .../pathrs-lite/internal/linux/mount_linux.go | 47 + .../internal/linux/openat2_linux.go | 31 + .../internal/procfs/procfs_linux.go | 544 ++++ .../internal/procfs/procfs_lookup_linux.go | 222 ++ .../filepath-securejoin/pathrs-lite/mkdir.go | 55 + .../pathrs-lite/mkdir_libpathrs.go | 52 + .../pathrs-lite/mkdir_purego.go | 42 + .../filepath-securejoin/pathrs-lite/open.go | 45 + .../pathrs-lite/open_libpathrs.go | 57 + .../pathrs-lite/open_purego.go | 42 + .../pathrs-lite/procfs/procfs_libpathrs.go | 161 ++ .../pathrs-lite/procfs/procfs_purego.go | 157 ++ .../cyphar/filepath-securejoin/vfs.go | 37 + .../moby/sys/capability/CHANGELOG.md | 124 + vendor/github.com/moby/sys/capability/LICENSE | 25 + .../github.com/moby/sys/capability/README.md | 13 + .../moby/sys/capability/capability.go | 176 ++ .../moby/sys/capability/capability_linux.go | 591 +++++ .../moby/sys/capability/capability_noop.go | 46 + vendor/github.com/moby/sys/capability/enum.go | 330 +++ .../moby/sys/capability/enum_gen.go | 137 + .../moby/sys/capability/syscall_linux.go | 161 ++ .../github.com/mrunalp/fileutils/.gitignore | 1 + vendor/github.com/mrunalp/fileutils/LICENSE | 191 ++ .../github.com/mrunalp/fileutils/MAINTAINERS | 1 + vendor/github.com/mrunalp/fileutils/README.md | 5 + .../github.com/mrunalp/fileutils/fileutils.go | 171 ++ .../github.com/mrunalp/fileutils/idtools.go | 57 + .../cgroups/.golangci-extra.yml | 21 + .../opencontainers/cgroups/.golangci.yml | 31 + .../opencontainers/cgroups/CODEOWNERS | 1 + .../opencontainers/cgroups/CONTRIBUTING.md | 150 ++ .../opencontainers/cgroups/GOVERNANCE.md | 63 + .../opencontainers/cgroups/MAINTAINERS | 8 + .../cgroups/MAINTAINERS_GUIDE.md | 92 + .../opencontainers/cgroups/README.md | 11 + .../opencontainers/cgroups/RELEASES.md | 51 + .../opencontainers/cgroups/cgroups.go | 78 + .../cgroups/config_blkio_device.go | 66 + .../cgroups/config_hugepages.go | 9 + .../cgroups/config_ifprio_map.go | 14 + .../opencontainers/cgroups/config_linux.go | 169 ++ .../opencontainers/cgroups/config_rdma.go | 9 + .../cgroups/config_unsupported.go | 8 + .../github.com/opencontainers/cgroups/file.go | 216 ++ .../opencontainers/cgroups/fs/blkio.go | 310 +++ .../opencontainers/cgroups/fs/cpu.go | 181 ++ .../opencontainers/cgroups/fs/cpuacct.go | 158 ++ .../opencontainers/cgroups/fs/cpuset.go | 276 ++ .../opencontainers/cgroups/fs/devices.go | 38 + .../opencontainers/cgroups/fs/error.go | 15 + .../opencontainers/cgroups/fs/freezer.go | 157 ++ .../opencontainers/cgroups/fs/fs.go | 265 ++ .../opencontainers/cgroups/fs/hugetlb.go | 83 + .../opencontainers/cgroups/fs/memory.go | 356 +++ .../opencontainers/cgroups/fs/name.go | 30 + .../opencontainers/cgroups/fs/net_cls.go | 31 + .../opencontainers/cgroups/fs/net_prio.go | 29 + .../opencontainers/cgroups/fs/paths.go | 169 ++ .../opencontainers/cgroups/fs/perf_event.go | 23 + .../opencontainers/cgroups/fs/pids.go | 61 + .../opencontainers/cgroups/fs/rdma.go | 24 + .../opencontainers/cgroups/fs2/cpu.go | 123 + .../opencontainers/cgroups/fs2/cpuset.go | 27 + .../opencontainers/cgroups/fs2/create.go | 151 ++ .../opencontainers/cgroups/fs2/defaultpath.go | 80 + .../opencontainers/cgroups/fs2/freezer.go | 140 + .../opencontainers/cgroups/fs2/fs2.go | 316 +++ .../opencontainers/cgroups/fs2/hugetlb.go | 70 + .../opencontainers/cgroups/fs2/io.go | 192 ++ .../opencontainers/cgroups/fs2/memory.go | 238 ++ .../opencontainers/cgroups/fs2/misc.go | 52 + .../opencontainers/cgroups/fs2/pids.go | 71 + .../opencontainers/cgroups/fs2/psi.go | 89 + .../opencontainers/cgroups/fscommon/rdma.go | 120 + .../opencontainers/cgroups/fscommon/utils.go | 144 + .../opencontainers/cgroups/getallpids.go | 27 + .../cgroups/internal/path/path.go | 52 + .../opencontainers/cgroups/manager/new.go | 77 + .../opencontainers/cgroups/stats.go | 209 ++ .../opencontainers/cgroups/systemd/common.go | 366 +++ .../opencontainers/cgroups/systemd/cpuset.go | 60 + .../opencontainers/cgroups/systemd/dbus.go | 102 + .../opencontainers/cgroups/systemd/devices.go | 74 + .../opencontainers/cgroups/systemd/user.go | 92 + .../opencontainers/cgroups/systemd/v1.go | 415 +++ .../opencontainers/cgroups/systemd/v2.go | 518 ++++ .../opencontainers/cgroups/utils.go | 483 ++++ .../opencontainers/cgroups/v1_utils.go | 276 ++ .../opencontainers/runc/internal/linux/doc.go | 3 + .../runc/internal/linux/linux.go | 44 + .../runc/internal/pathrs/doc.go | 23 + .../internal/pathrs/mkdirall_pathrslite.go | 99 + .../runc/internal/pathrs/path.go | 34 + .../runc/internal/pathrs/procfs_pathrslite.go | 108 + .../runc/internal/pathrs/retry.go | 66 + .../runc/internal/pathrs/root_pathrslite.go | 72 + .../opencontainers/runc/internal/sys/doc.go | 5 + .../runc/internal/sys/opath_linux.go | 53 + .../runc/internal/sys/sysctl_linux.go | 54 + .../runc/internal/sys/verify_inode_unix.go | 30 + .../runc/libcontainer/README.md | 261 ++ .../opencontainers/runc/libcontainer/SPEC.md | 465 ++++ .../runc/libcontainer/apparmor/apparmor.go | 16 + .../libcontainer/apparmor/apparmor_linux.go | 70 + .../apparmor/apparmor_unsupported.go | 14 + .../libcontainer/capabilities/capabilities.go | 158 ++ .../capabilities/capabilities_unsupported.go | 3 + .../libcontainer/configs/cgroup_deprecated.go | 29 + .../runc/libcontainer/configs/config.go | 619 +++++ .../runc/libcontainer/configs/config_linux.go | 97 + .../libcontainer/configs/configs_fuzzer.go | 9 + .../runc/libcontainer/configs/intelrdt.go | 16 + .../runc/libcontainer/configs/mount.go | 7 + .../runc/libcontainer/configs/mount_linux.go | 66 + .../libcontainer/configs/mount_unsupported.go | 9 + .../runc/libcontainer/configs/namespaces.go | 5 + .../libcontainer/configs/namespaces_linux.go | 133 + .../configs/namespaces_syscall.go | 45 + .../configs/namespaces_syscall_unsupported.go | 13 + .../configs/namespaces_unsupported.go | 7 + .../runc/libcontainer/configs/network.go | 75 + .../libcontainer/configs/validate/rootless.go | 87 + .../configs/validate/validator.go | 418 +++ .../runc/libcontainer/console_linux.go | 164 ++ .../runc/libcontainer/container.go | 59 + .../runc/libcontainer/container_linux.go | 1199 +++++++++ .../runc/libcontainer/criu_disabled_linux.go | 15 + .../runc/libcontainer/criu_linux.go | 1202 +++++++++ .../runc/libcontainer/criu_opts_linux.go | 39 + .../opencontainers/runc/libcontainer/env.go | 100 + .../opencontainers/runc/libcontainer/error.go | 14 + .../exeseal/cloned_binary_linux.go | 263 ++ .../libcontainer/exeseal/overlayfs_linux.go | 122 + .../runc/libcontainer/factory_linux.go | 219 ++ .../runc/libcontainer/init_linux.go | 724 +++++ .../runc/libcontainer/intelrdt/cmt.go | 23 + .../runc/libcontainer/intelrdt/intelrdt.go | 681 +++++ .../runc/libcontainer/intelrdt/mbm.go | 31 + .../runc/libcontainer/intelrdt/monitoring.go | 83 + .../runc/libcontainer/intelrdt/stats.go | 57 + .../internal/userns/userns_maps_linux.c | 81 + .../internal/userns/userns_maps_linux.go | 186 ++ .../internal/userns/usernsfd_linux.go | 156 ++ .../runc/libcontainer/keys/keyctl.go | 45 + .../runc/libcontainer/logs/logs.go | 56 + .../runc/libcontainer/message_linux.go | 97 + .../runc/libcontainer/mount_linux.go | 339 +++ .../runc/libcontainer/network_linux.go | 100 + .../runc/libcontainer/notify_linux.go | 84 + .../runc/libcontainer/notify_v2_linux.go | 85 + .../runc/libcontainer/process.go | 169 ++ .../runc/libcontainer/process_linux.go | 1008 +++++++ .../runc/libcontainer/restored_process.go | 128 + .../runc/libcontainer/rootfs_linux.go | 1483 +++++++++++ .../runc/libcontainer/seccomp/config.go | 150 ++ .../seccomp/patchbpf/enosys_linux.go | 735 ++++++ .../seccomp/patchbpf/enosys_unsupported.go | 3 + .../libcontainer/seccomp/seccomp_linux.go | 350 +++ .../seccomp/seccomp_unsupported.go | 33 + .../runc/libcontainer/setns_init_linux.go | 158 ++ .../runc/libcontainer/standard_init_linux.go | 298 +++ .../runc/libcontainer/state_linux.go | 244 ++ .../runc/libcontainer/stats_linux.go | 13 + .../opencontainers/runc/libcontainer/sync.go | 203 ++ .../runc/libcontainer/sync_unix.go | 95 + .../runc/libcontainer/system/linux.go | 191 ++ .../runc/libcontainer/system/proc.go | 137 + .../runc/libcontainer/system/rlimit_linux.go | 15 + .../runc/libcontainer/utils/cmsg.go | 135 + .../runc/libcontainer/utils/utils.go | 115 + .../runc/libcontainer/utils/utils_unix.go | 277 ++ .../opencontainers/runc/types/events.go | 165 ++ .../github.com/opencontainers/selinux/LICENSE | 201 ++ .../opencontainers/selinux/go-selinux/doc.go | 13 + .../selinux/go-selinux/label/label.go | 48 + .../selinux/go-selinux/label/label_linux.go | 136 + .../selinux/go-selinux/label/label_stub.go | 44 + .../selinux/go-selinux/selinux.go | 322 +++ .../selinux/go-selinux/selinux_linux.go | 1401 ++++++++++ .../selinux/go-selinux/selinux_stub.go | 155 ++ .../selinux/go-selinux/xattrs_linux.go | 71 + .../selinux/pkg/pwalkdir/README.md | 56 + .../selinux/pkg/pwalkdir/pwalkdir.go | 123 + .../seccomp/libseccomp-golang/.gitignore | 4 + .../seccomp/libseccomp-golang/.golangci.yml | 4 + .../seccomp/libseccomp-golang/CHANGELOG | 42 + .../seccomp/libseccomp-golang/CONTRIBUTING.md | 120 + .../seccomp/libseccomp-golang/LICENSE | 22 + .../seccomp/libseccomp-golang/Makefile | 31 + .../seccomp/libseccomp-golang/README.md | 59 + .../seccomp/libseccomp-golang/SECURITY.md | 48 + .../seccomp/libseccomp-golang/seccomp.go | 1188 +++++++++ .../libseccomp-golang/seccomp_internal.go | 884 +++++++ .../golang.org/x/sys/unix/affinity_linux.go | 9 +- vendor/golang.org/x/sys/unix/fdset.go | 4 +- vendor/golang.org/x/sys/unix/ifreq_linux.go | 4 +- vendor/golang.org/x/sys/unix/mkall.sh | 1 + vendor/golang.org/x/sys/unix/mkerrors.sh | 5 +- vendor/golang.org/x/sys/unix/syscall_linux.go | 10 +- .../golang.org/x/sys/unix/syscall_netbsd.go | 17 + .../golang.org/x/sys/unix/syscall_solaris.go | 2 +- vendor/golang.org/x/sys/unix/zerrors_linux.go | 361 +++ .../x/sys/unix/zerrors_linux_386.go | 2 + .../x/sys/unix/zerrors_linux_amd64.go | 2 + .../x/sys/unix/zerrors_linux_arm.go | 2 + .../x/sys/unix/zerrors_linux_arm64.go | 2 + .../x/sys/unix/zerrors_linux_loong64.go | 2 + .../x/sys/unix/zerrors_linux_mips.go | 2 + .../x/sys/unix/zerrors_linux_mips64.go | 2 + .../x/sys/unix/zerrors_linux_mips64le.go | 2 + .../x/sys/unix/zerrors_linux_mipsle.go | 2 + .../x/sys/unix/zerrors_linux_ppc.go | 2 + .../x/sys/unix/zerrors_linux_ppc64.go | 2 + .../x/sys/unix/zerrors_linux_ppc64le.go | 2 + .../x/sys/unix/zerrors_linux_riscv64.go | 2 + .../x/sys/unix/zerrors_linux_s390x.go | 2 + .../x/sys/unix/zerrors_linux_sparc64.go | 2 + .../golang.org/x/sys/unix/zsyscall_linux.go | 10 + .../x/sys/unix/zsyscall_solaris_amd64.go | 8 +- vendor/golang.org/x/sys/unix/ztypes_linux.go | 72 + .../x/sys/unix/ztypes_netbsd_arm.go | 2 +- .../sys/windows/registry/zsyscall_windows.go | 16 +- .../x/sys/windows/syscall_windows.go | 17 + .../golang.org/x/sys/windows/types_windows.go | 98 + .../x/sys/windows/zsyscall_windows.go | 1021 ++++---- vendor/modules.txt | 71 +- 296 files changed, 41675 insertions(+), 561 deletions(-) create mode 100644 vendor/cyphar.com/go-pathrs/.golangci.yml create mode 100644 vendor/cyphar.com/go-pathrs/COPYING create mode 100644 vendor/cyphar.com/go-pathrs/doc.go create mode 100644 vendor/cyphar.com/go-pathrs/handle_linux.go create mode 100644 vendor/cyphar.com/go-pathrs/internal/fdutils/fd_linux.go create mode 100644 vendor/cyphar.com/go-pathrs/internal/libpathrs/error_unix.go create mode 100644 vendor/cyphar.com/go-pathrs/internal/libpathrs/libpathrs_linux.go create mode 100644 vendor/cyphar.com/go-pathrs/procfs/procfs_linux.go create mode 100644 vendor/cyphar.com/go-pathrs/root_linux.go create mode 100644 vendor/cyphar.com/go-pathrs/utils_linux.go create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/.gitignore create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/.golangci.yml create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/LICENSE create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/Makefile create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/README.md create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/features.go create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/main.go create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/notify.go create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.pb.go create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.proto create mode 100644 vendor/github.com/cyphar/filepath-securejoin/.golangci.yml create mode 100644 vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/COPYING.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD create mode 100644 vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 create mode 100644 vendor/github.com/cyphar/filepath-securejoin/README.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/VERSION create mode 100644 vendor/github.com/cyphar/filepath-securejoin/codecov.yml create mode 100644 vendor/github.com/cyphar/filepath-securejoin/doc.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/join.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/doc.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/lookup_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/mkdir_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/open_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/openat2_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_libpathrs.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_purego.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_libpathrs.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_purego.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_libpathrs.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_purego.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/vfs.go create mode 100644 vendor/github.com/moby/sys/capability/CHANGELOG.md create mode 100644 vendor/github.com/moby/sys/capability/LICENSE create mode 100644 vendor/github.com/moby/sys/capability/README.md create mode 100644 vendor/github.com/moby/sys/capability/capability.go create mode 100644 vendor/github.com/moby/sys/capability/capability_linux.go create mode 100644 vendor/github.com/moby/sys/capability/capability_noop.go create mode 100644 vendor/github.com/moby/sys/capability/enum.go create mode 100644 vendor/github.com/moby/sys/capability/enum_gen.go create mode 100644 vendor/github.com/moby/sys/capability/syscall_linux.go create mode 100644 vendor/github.com/mrunalp/fileutils/.gitignore create mode 100644 vendor/github.com/mrunalp/fileutils/LICENSE create mode 100644 vendor/github.com/mrunalp/fileutils/MAINTAINERS create mode 100644 vendor/github.com/mrunalp/fileutils/README.md create mode 100644 vendor/github.com/mrunalp/fileutils/fileutils.go create mode 100644 vendor/github.com/mrunalp/fileutils/idtools.go create mode 100644 vendor/github.com/opencontainers/cgroups/.golangci-extra.yml create mode 100644 vendor/github.com/opencontainers/cgroups/.golangci.yml create mode 100644 vendor/github.com/opencontainers/cgroups/CODEOWNERS create mode 100644 vendor/github.com/opencontainers/cgroups/CONTRIBUTING.md create mode 100644 vendor/github.com/opencontainers/cgroups/GOVERNANCE.md create mode 100644 vendor/github.com/opencontainers/cgroups/MAINTAINERS create mode 100644 vendor/github.com/opencontainers/cgroups/MAINTAINERS_GUIDE.md create mode 100644 vendor/github.com/opencontainers/cgroups/README.md create mode 100644 vendor/github.com/opencontainers/cgroups/RELEASES.md create mode 100644 vendor/github.com/opencontainers/cgroups/cgroups.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_blkio_device.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_hugepages.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_ifprio_map.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_linux.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_rdma.go create mode 100644 vendor/github.com/opencontainers/cgroups/config_unsupported.go create mode 100644 vendor/github.com/opencontainers/cgroups/file.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/blkio.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpu.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpuacct.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpuset.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/devices.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/error.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/freezer.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/fs.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/hugetlb.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/memory.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/name.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/net_cls.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/net_prio.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/paths.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/perf_event.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/pids.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs/rdma.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/cpu.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/cpuset.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/create.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/defaultpath.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/freezer.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/fs2.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/hugetlb.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/io.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/memory.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/misc.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/pids.go create mode 100644 vendor/github.com/opencontainers/cgroups/fs2/psi.go create mode 100644 vendor/github.com/opencontainers/cgroups/fscommon/rdma.go create mode 100644 vendor/github.com/opencontainers/cgroups/fscommon/utils.go create mode 100644 vendor/github.com/opencontainers/cgroups/getallpids.go create mode 100644 vendor/github.com/opencontainers/cgroups/internal/path/path.go create mode 100644 vendor/github.com/opencontainers/cgroups/manager/new.go create mode 100644 vendor/github.com/opencontainers/cgroups/stats.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/common.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/cpuset.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/dbus.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/devices.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/user.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/v1.go create mode 100644 vendor/github.com/opencontainers/cgroups/systemd/v2.go create mode 100644 vendor/github.com/opencontainers/cgroups/utils.go create mode 100644 vendor/github.com/opencontainers/cgroups/v1_utils.go create mode 100644 vendor/github.com/opencontainers/runc/internal/linux/doc.go create mode 100644 vendor/github.com/opencontainers/runc/internal/linux/linux.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/doc.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/mkdirall_pathrslite.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/path.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/procfs_pathrslite.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/retry.go create mode 100644 vendor/github.com/opencontainers/runc/internal/pathrs/root_pathrslite.go create mode 100644 vendor/github.com/opencontainers/runc/internal/sys/doc.go create mode 100644 vendor/github.com/opencontainers/runc/internal/sys/opath_linux.go create mode 100644 vendor/github.com/opencontainers/runc/internal/sys/sysctl_linux.go create mode 100644 vendor/github.com/opencontainers/runc/internal/sys/verify_inode_unix.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/README.md create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/SPEC.md create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_deprecated.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/config.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/network.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/console_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/container.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/container_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/criu_disabled_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/criu_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/env.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/error.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/exeseal/cloned_binary_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/exeseal/overlayfs_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/init_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.c create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/internal/userns/usernsfd_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/message_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/network_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/process.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/process_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/restored_process.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/state_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/sync.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/sync_unix.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/system/linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/system/proc.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go create mode 100644 vendor/github.com/opencontainers/runc/types/events.go create mode 100644 vendor/github.com/opencontainers/selinux/LICENSE create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/doc.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/label/label.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/selinux.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go create mode 100644 vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go create mode 100644 vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md create mode 100644 vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go create mode 100644 vendor/github.com/seccomp/libseccomp-golang/.gitignore create mode 100644 vendor/github.com/seccomp/libseccomp-golang/.golangci.yml create mode 100644 vendor/github.com/seccomp/libseccomp-golang/CHANGELOG create mode 100644 vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md create mode 100644 vendor/github.com/seccomp/libseccomp-golang/LICENSE create mode 100644 vendor/github.com/seccomp/libseccomp-golang/Makefile create mode 100644 vendor/github.com/seccomp/libseccomp-golang/README.md create mode 100644 vendor/github.com/seccomp/libseccomp-golang/SECURITY.md create mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp.go create mode 100644 vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go diff --git a/go.mod b/go.mod index c0207708c3..7a560b6540 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/Microsoft/hcsshim -go 1.23.0 +go 1.24.0 require ( github.com/Microsoft/cosesign1go v1.4.0 @@ -9,7 +9,7 @@ require ( github.com/blang/semver/v4 v4.0.0 github.com/cenkalti/backoff/v4 v4.3.0 github.com/containerd/cgroups/v3 v3.0.5 - github.com/containerd/console v1.0.4 + github.com/containerd/console v1.0.5 github.com/containerd/containerd/api v1.9.0 github.com/containerd/containerd/v2 v2.1.2 github.com/containerd/errdefs v1.0.0 @@ -27,7 +27,7 @@ require ( github.com/moby/sys/user v0.4.0 github.com/open-policy-agent/opa v0.70.0 github.com/opencontainers/cgroups v0.0.4 - github.com/opencontainers/runc v1.3.0 + github.com/opencontainers/runc v1.3.3 github.com/opencontainers/runtime-spec v1.2.1 github.com/pelletier/go-toml v1.9.5 github.com/pkg/errors v0.9.1 @@ -41,18 +41,20 @@ require ( go.uber.org/mock v0.6.0 golang.org/x/net v0.43.0 golang.org/x/sync v0.16.0 - golang.org/x/sys v0.35.0 + golang.org/x/sys v0.39.0 google.golang.org/grpc v1.75.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.5.1 google.golang.org/protobuf v1.36.7 ) require ( + cyphar.com/go-pathrs v0.2.1 // indirect github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/agnivade/levenshtein v1.2.0 // indirect github.com/akavel/rsrc v0.10.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/checkpoint-restore/go-criu/v6 v6.3.0 // indirect github.com/containerd/continuity v0.4.5 // indirect github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/log v0.1.0 // indirect @@ -60,6 +62,7 @@ require ( github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/cyphar/filepath-securejoin v0.6.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/docker/cli v24.0.0+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect @@ -88,17 +91,21 @@ require ( github.com/mdlayher/socket v0.5.1 // indirect github.com/mdlayher/vsock v1.2.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/moby/sys/capability v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/userns v0.1.0 // indirect + github.com/mrunalp/fileutils v0.5.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/opencontainers/selinux v1.13.0 // indirect github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/seccomp/libseccomp-golang v0.10.0 // indirect github.com/tchap/go-patricia/v2 v2.3.2 // indirect github.com/vbatts/tar-split v0.11.5 // indirect github.com/veraison/go-cose v1.1.0 // indirect diff --git a/go.sum b/go.sum index 53ceeffb17..f25a3f2f8d 100644 --- a/go.sum +++ b/go.sum @@ -335,6 +335,8 @@ cloud.google.com/go/vpcaccess v1.8.6/go.mod h1:61yymNplV1hAbo8+kBOFO7Vs+4ZHYI244 cloud.google.com/go/webrisk v1.11.1/go.mod h1:+9SaepGg2lcp1p0pXuHyz3R2Yi2fHKKb4c1Q9y0qbtA= cloud.google.com/go/websecurityscanner v1.7.6/go.mod h1:ucaaTO5JESFn5f2pjdX01wGbQ8D6h79KHrmO2uGZeiY= cloud.google.com/go/workflows v1.14.2/go.mod h1:5nqKjMD+MsJs41sJhdVrETgvD5cOK3hUcAs8ygqYvXQ= +cyphar.com/go-pathrs v0.2.1 h1:9nx1vOgwVvX1mNBWDu93+vaceedpbsDqo+XuBGL40b8= +cyphar.com/go-pathrs v0.2.1/go.mod h1:y8f1EMG7r+hCuFf/rXsKqMJrJAUoADZGNh5/vZPKcGc= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc= @@ -408,6 +410,8 @@ github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3Z5Odx4wjKOASzA= +github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.2.0/go.mod h1:9+9sk7u7pGNWYMkh0hdiL++6OeibzJccyQU4p4MedaY= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -441,8 +445,8 @@ github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1Ig github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/cgroups/v3 v3.0.5 h1:44na7Ud+VwyE7LIoJ8JTNQOa549a8543BmzaJHo6Bzo= github.com/containerd/cgroups/v3 v3.0.5/go.mod h1:SA5DLYnXO8pTGYiAHXz94qvLQTKfVM5GEVisn4jpins= -github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn4ro= -github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= +github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= +github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= github.com/containerd/containerd/api v1.9.0 h1:HZ/licowTRazus+wt9fM6r/9BQO7S0vD5lMcWspGIg0= github.com/containerd/containerd/api v1.9.0/go.mod h1:GhghKFmTR3hNtyznBoQ0EMWr9ju5AqHjcZPsSpTKutI= github.com/containerd/containerd/v2 v2.1.2 h1:4ZQxB+FVYmwXZgpBcKfar6ieppm3KC5C6FRKvtJ6DRU= @@ -473,9 +477,12 @@ github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++ github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cyphar/filepath-securejoin v0.6.0 h1:BtGB77njd6SVO6VztOHfPxKitJvd/VPT+OFBFMOi1Is= +github.com/cyphar/filepath-securejoin v0.6.0/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1AX0a9kM5XL+NwKoYSc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -760,6 +767,7 @@ github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/josephspurrier/goversioninfo v1.5.0 h1:9TJtORoyf4YMoWSOo/cXFN9A/lB3PniJ91OxIH6e7Zg= github.com/josephspurrier/goversioninfo v1.5.0/go.mod h1:6MoTvFZ6GKJkzcdLnU5T/RGYUbHQbKpYeNP0AgQLd2o= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -829,6 +837,8 @@ github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8Ie github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= +github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= @@ -838,6 +848,8 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= +github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/open-policy-agent/opa v0.70.0 h1:B3cqCN2iQAyKxK6+GI+N40uqkin+wzIrM7YA60t9x1U= @@ -848,10 +860,12 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= -github.com/opencontainers/runc v1.3.0 h1:cvP7xbEvD0QQAs0nZKLzkVog2OPZhI/V2w3WmTmUSXI= -github.com/opencontainers/runc v1.3.0/go.mod h1:9wbWt42gV+KRxKRVVugNP6D5+PQciRbenB4fLVsqGPs= +github.com/opencontainers/runc v1.3.3 h1:qlmBbbhu+yY0QM7jqfuat7M1H3/iXjju3VkP9lkFQr4= +github.com/opencontainers/runc v1.3.3/go.mod h1:D7rL72gfWxVs9cJ2/AayxB0Hlvn9g0gaF1R7uunumSI= github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww= github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/selinux v1.13.0 h1:Zza88GWezyT7RLql12URvoxsbLfjFx988+LGaWfbL84= +github.com/opencontainers/selinux v1.13.0/go.mod h1:XxWTed+A/s5NNq4GmYScVy+9jzXhGBVEOAyucdRUY8s= github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= @@ -897,6 +911,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= +github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY= +github.com/seccomp/libseccomp-golang v0.10.0/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= @@ -904,6 +920,8 @@ github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY52 github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.9.2/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcDf8Y= github.com/spf13/afero v1.10.0/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ= +github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -922,8 +940,9 @@ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg= github.com/tchap/go-patricia/v2 v2.3.2 h1:xTHFutuitO2zqKAQ5rCROYgUb7Or/+IC3fts9/Yc7nM= github.com/tchap/go-patricia/v2 v2.3.2/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwDlLElMsnSHD4k= @@ -1395,6 +1414,7 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1468,8 +1488,8 @@ golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/telemetry v0.0.0-20240521205824-bda55230c457/go.mod h1:pRgIJT+bRLFKnoM1ldnzKoxTIn14Yxz928LQRYYgIN0= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/internal/guest/runtime/runc/utils.go b/internal/guest/runtime/runc/utils.go index 1112422f17..12fc07ee54 100644 --- a/internal/guest/runtime/runc/utils.go +++ b/internal/guest/runtime/runc/utils.go @@ -119,7 +119,7 @@ type standardLogEntry struct { func (l *standardLogEntry) asError() (err error) { err = parseRuncError(l.Message) if l.Err != nil { - err = errors.Wrapf(err, l.Err.Error()) + err = errors.Wrap(err, l.Err.Error()) } return } diff --git a/internal/tools/uvmboot/main.go b/internal/tools/uvmboot/main.go index de2c1f7dbe..300c968e30 100644 --- a/internal/tools/uvmboot/main.go +++ b/internal/tools/uvmboot/main.go @@ -97,7 +97,7 @@ func main() { app.Before = func(c *cli.Context) error { if !winapi.IsElevated() { - return fmt.Errorf(c.App.Name + " must be run in an elevated context") + return fmt.Errorf("%s must be run in an elevated context", c.App.Name) } lvl := logrus.WarnLevel diff --git a/test/go.mod b/test/go.mod index 38314887a3..b08e13e2e9 100644 --- a/test/go.mod +++ b/test/go.mod @@ -24,13 +24,14 @@ require ( github.com/urfave/cli/v2 v2.27.6 go.opencensus.io v0.24.0 golang.org/x/sync v0.18.0 - golang.org/x/sys v0.38.0 + golang.org/x/sys v0.39.0 google.golang.org/grpc v1.75.0 google.golang.org/protobuf v1.36.7 k8s.io/cri-api v0.32.3 ) require ( + cyphar.com/go-pathrs v0.2.1 // indirect github.com/Microsoft/cosesign1go v1.4.0 // indirect github.com/Microsoft/didx509go v0.0.3 // indirect github.com/OneOfOne/xxhash v1.2.8 // indirect @@ -39,6 +40,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/checkpoint-restore/go-criu/v6 v6.3.0 // indirect github.com/containerd/console v1.0.5 // indirect github.com/containerd/continuity v0.4.5 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect @@ -49,6 +51,7 @@ require ( github.com/containerd/typeurl/v2 v2.2.3 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect + github.com/cyphar/filepath-securejoin v0.6.0 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/cli v24.0.0+incompatible // indirect @@ -79,16 +82,18 @@ require ( github.com/mattn/go-shellwords v1.0.12 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/locker v1.0.1 // indirect + github.com/moby/sys/capability v0.4.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/moby/sys/sequential v0.6.0 // indirect github.com/moby/sys/signal v0.7.1 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect + github.com/mrunalp/fileutils v0.5.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/open-policy-agent/opa v0.70.0 // indirect github.com/opencontainers/cgroups v0.0.4 // indirect github.com/opencontainers/runc v1.3.3 // indirect - github.com/opencontainers/selinux v1.12.0 // indirect + github.com/opencontainers/selinux v1.13.0 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect @@ -96,6 +101,7 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/seccomp/libseccomp-golang v0.10.0 // indirect github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect github.com/tchap/go-patricia/v2 v2.3.2 // indirect github.com/vbatts/tar-split v0.11.5 // indirect diff --git a/test/go.sum b/test/go.sum index e89d69cc44..3d1deea7ee 100644 --- a/test/go.sum +++ b/test/go.sum @@ -1,4 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cyphar.com/go-pathrs v0.2.1 h1:9nx1vOgwVvX1mNBWDu93+vaceedpbsDqo+XuBGL40b8= +cyphar.com/go-pathrs v0.2.1/go.mod h1:y8f1EMG7r+hCuFf/rXsKqMJrJAUoADZGNh5/vZPKcGc= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -29,6 +31,8 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3Z5Odx4wjKOASzA= +github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/containerd/cgroups/v3 v3.0.5 h1:44na7Ud+VwyE7LIoJ8JTNQOa549a8543BmzaJHo6Bzo= @@ -63,8 +67,11 @@ github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++ github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cyphar/filepath-securejoin v0.6.0 h1:BtGB77njd6SVO6VztOHfPxKitJvd/VPT+OFBFMOi1Is= +github.com/cyphar/filepath-securejoin v0.6.0/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1AX0a9kM5XL+NwKoYSc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -136,6 +143,7 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= @@ -148,6 +156,7 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -165,6 +174,7 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/josephspurrier/goversioninfo v1.5.0 h1:9TJtORoyf4YMoWSOo/cXFN9A/lB3PniJ91OxIH6e7Zg= github.com/josephspurrier/goversioninfo v1.5.0/go.mod h1:6MoTvFZ6GKJkzcdLnU5T/RGYUbHQbKpYeNP0AgQLd2o= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -199,6 +209,8 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs= github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= +github.com/moby/sys/capability v0.4.0 h1:4D4mI6KlNtWMCM1Z/K0i7RV1FkX+DBDHKVJpCndZoHk= +github.com/moby/sys/capability v0.4.0/go.mod h1:4g9IK291rVkms3LKCDOoYlnV8xKwoDTpIrNEE35Wq0I= github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg= github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4= github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= @@ -210,6 +222,8 @@ github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85 github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= +github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= +github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/open-policy-agent/opa v0.70.0 h1:B3cqCN2iQAyKxK6+GI+N40uqkin+wzIrM7YA60t9x1U= @@ -228,8 +242,8 @@ github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/ github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0= github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI= github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= -github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8= -github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U= +github.com/opencontainers/selinux v1.13.0 h1:Zza88GWezyT7RLql12URvoxsbLfjFx988+LGaWfbL84= +github.com/opencontainers/selinux v1.13.0/go.mod h1:XxWTed+A/s5NNq4GmYScVy+9jzXhGBVEOAyucdRUY8s= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -252,9 +266,13 @@ github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY= +github.com/seccomp/libseccomp-golang v0.10.0/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -268,8 +286,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia/v2 v2.3.2 h1:xTHFutuitO2zqKAQ5rCROYgUb7Or/+IC3fts9/Yc7nM= @@ -376,6 +394,7 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -387,8 +406,8 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= -golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -446,6 +465,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/vendor/cyphar.com/go-pathrs/.golangci.yml b/vendor/cyphar.com/go-pathrs/.golangci.yml new file mode 100644 index 0000000000..2778a3268e --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/.golangci.yml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MPL-2.0 +# +# libpathrs: safe path resolution on Linux +# Copyright (C) 2019-2025 Aleksa Sarai +# Copyright (C) 2019-2025 SUSE LLC +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +version: "2" +linters: + enable: + - bidichk + - cyclop + - errname + - errorlint + - exhaustive + - goconst + - godot + - gomoddirectives + - gosec + - mirror + - misspell + - mnd + - nilerr + - nilnil + - perfsprint + - prealloc + - reassign + - revive + - unconvert + - unparam + - usestdlibvars + - wastedassign +formatters: + enable: + - gofumpt + - goimports + settings: + goimports: + local-prefixes: + - cyphar.com/go-pathrs diff --git a/vendor/cyphar.com/go-pathrs/COPYING b/vendor/cyphar.com/go-pathrs/COPYING new file mode 100644 index 0000000000..d0a1fa1482 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/COPYING @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/vendor/cyphar.com/go-pathrs/doc.go b/vendor/cyphar.com/go-pathrs/doc.go new file mode 100644 index 0000000000..a7ee4bc487 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/doc.go @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +// Package pathrs provides bindings for libpathrs, a library for safe path +// resolution on Linux. +package pathrs diff --git a/vendor/cyphar.com/go-pathrs/handle_linux.go b/vendor/cyphar.com/go-pathrs/handle_linux.go new file mode 100644 index 0000000000..3221ef6738 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/handle_linux.go @@ -0,0 +1,114 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +package pathrs + +import ( + "fmt" + "os" + + "cyphar.com/go-pathrs/internal/fdutils" + "cyphar.com/go-pathrs/internal/libpathrs" +) + +// Handle is a handle for a path within a given [Root]. This handle references +// an already-resolved path which can be used for only one purpose -- to +// "re-open" the handle and get an actual [os.File] which can be used for +// ordinary operations. +// +// If you wish to open a file without having an intermediate [Handle] object, +// you can try to use [Root.Open] or [Root.OpenFile]. +// +// It is critical that perform all relevant operations through this [Handle] +// (rather than fetching the file descriptor yourself with [Handle.IntoRaw]), +// because the security properties of libpathrs depend on users doing all +// relevant filesystem operations through libpathrs. +// +// [os.File]: https://pkg.go.dev/os#File +type Handle struct { + inner *os.File +} + +// HandleFromFile creates a new [Handle] from an existing file handle. The +// handle will be copied by this method, so the original handle should still be +// freed by the caller. +// +// This is effectively the inverse operation of [Handle.IntoRaw], and is used +// for "deserialising" pathrs root handles. +func HandleFromFile(file *os.File) (*Handle, error) { + newFile, err := fdutils.DupFile(file) + if err != nil { + return nil, fmt.Errorf("duplicate handle fd: %w", err) + } + return &Handle{inner: newFile}, nil +} + +// Open creates an "upgraded" file handle to the file referenced by the +// [Handle]. Note that the original [Handle] is not consumed by this operation, +// and can be opened multiple times. +// +// The handle returned is only usable for reading, and this is method is +// shorthand for [Handle.OpenFile] with os.O_RDONLY. +// +// TODO: Rename these to "Reopen" or something. +func (h *Handle) Open() (*os.File, error) { + return h.OpenFile(os.O_RDONLY) +} + +// OpenFile creates an "upgraded" file handle to the file referenced by the +// [Handle]. Note that the original [Handle] is not consumed by this operation, +// and can be opened multiple times. +// +// The provided flags indicate which open(2) flags are used to create the new +// handle. +// +// TODO: Rename these to "Reopen" or something. +func (h *Handle) OpenFile(flags int) (*os.File, error) { + return fdutils.WithFileFd(h.inner, func(fd uintptr) (*os.File, error) { + newFd, err := libpathrs.Reopen(fd, flags) + if err != nil { + return nil, err + } + return os.NewFile(newFd, h.inner.Name()), nil + }) +} + +// IntoFile unwraps the [Handle] into its underlying [os.File]. +// +// You almost certainly want to use [Handle.OpenFile] to get a non-O_PATH +// version of this [Handle]. +// +// This operation returns the internal [os.File] of the [Handle] directly, so +// calling [Handle.Close] will also close any copies of the returned [os.File]. +// If you want to get an independent copy, use [Handle.Clone] followed by +// [Handle.IntoFile] on the cloned [Handle]. +// +// [os.File]: https://pkg.go.dev/os#File +func (h *Handle) IntoFile() *os.File { + // TODO: Figure out if we really don't want to make a copy. + // TODO: We almost certainly want to clear r.inner here, but we can't do + // that easily atomically (we could use atomic.Value but that'll make + // things quite a bit uglier). + return h.inner +} + +// Clone creates a copy of a [Handle], such that it has a separate lifetime to +// the original (while referring to the same underlying file). +func (h *Handle) Clone() (*Handle, error) { + return HandleFromFile(h.inner) +} + +// Close frees all of the resources used by the [Handle]. +func (h *Handle) Close() error { + return h.inner.Close() +} diff --git a/vendor/cyphar.com/go-pathrs/internal/fdutils/fd_linux.go b/vendor/cyphar.com/go-pathrs/internal/fdutils/fd_linux.go new file mode 100644 index 0000000000..41aea3e4b3 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/internal/fdutils/fd_linux.go @@ -0,0 +1,75 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +// Package fdutils contains a few helper methods when dealing with *os.File and +// file descriptors. +package fdutils + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" + + "cyphar.com/go-pathrs/internal/libpathrs" +) + +// DupFd makes a duplicate of the given fd. +func DupFd(fd uintptr, name string) (*os.File, error) { + newFd, err := unix.FcntlInt(fd, unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("fcntl(F_DUPFD_CLOEXEC): %w", err) + } + return os.NewFile(uintptr(newFd), name), nil +} + +// WithFileFd is a more ergonomic wrapper around file.SyscallConn().Control(). +func WithFileFd[T any](file *os.File, fn func(fd uintptr) (T, error)) (T, error) { + conn, err := file.SyscallConn() + if err != nil { + return *new(T), err + } + var ( + ret T + innerErr error + ) + if err := conn.Control(func(fd uintptr) { + ret, innerErr = fn(fd) + }); err != nil { + return *new(T), err + } + return ret, innerErr +} + +// DupFile makes a duplicate of the given file. +func DupFile(file *os.File) (*os.File, error) { + return WithFileFd(file, func(fd uintptr) (*os.File, error) { + return DupFd(fd, file.Name()) + }) +} + +// MkFile creates a new *os.File from the provided file descriptor. However, +// unlike os.NewFile, the file's Name is based on the real path (provided by +// /proc/self/fd/$n). +func MkFile(fd uintptr) (*os.File, error) { + fdPath := fmt.Sprintf("fd/%d", fd) + fdName, err := libpathrs.ProcReadlinkat(libpathrs.ProcDefaultRootFd, libpathrs.ProcThreadSelf, fdPath) + if err != nil { + _ = unix.Close(int(fd)) + return nil, fmt.Errorf("failed to fetch real name of fd %d: %w", fd, err) + } + // TODO: Maybe we should prefix this name with something to indicate to + // users that they must not use this path as a "safe" path. Something like + // "//pathrs-handle:/foo/bar"? + return os.NewFile(fd, fdName), nil +} diff --git a/vendor/cyphar.com/go-pathrs/internal/libpathrs/error_unix.go b/vendor/cyphar.com/go-pathrs/internal/libpathrs/error_unix.go new file mode 100644 index 0000000000..c9f416de01 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/internal/libpathrs/error_unix.go @@ -0,0 +1,40 @@ +//go:build linux + +// TODO: Use "go:build unix" once we bump the minimum Go version 1.19. + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +package libpathrs + +import ( + "syscall" +) + +// Error represents an underlying libpathrs error. +type Error struct { + description string + errno syscall.Errno +} + +// Error returns a textual description of the error. +func (err *Error) Error() string { + return err.description +} + +// Unwrap returns the underlying error which was wrapped by this error (if +// applicable). +func (err *Error) Unwrap() error { + if err.errno != 0 { + return err.errno + } + return nil +} diff --git a/vendor/cyphar.com/go-pathrs/internal/libpathrs/libpathrs_linux.go b/vendor/cyphar.com/go-pathrs/internal/libpathrs/libpathrs_linux.go new file mode 100644 index 0000000000..c07b80e307 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/internal/libpathrs/libpathrs_linux.go @@ -0,0 +1,337 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +// Package libpathrs is an internal thin wrapper around the libpathrs C API. +package libpathrs + +import ( + "fmt" + "syscall" + "unsafe" +) + +/* +// TODO: Figure out if we need to add support for linking against libpathrs +// statically even if in dynamically linked builds in order to make +// packaging a bit easier (using "-Wl,-Bstatic -lpathrs -Wl,-Bdynamic" or +// "-l:pathrs.a"). +#cgo pkg-config: pathrs +#include + +// This is a workaround for unsafe.Pointer() not working for non-void pointers. +char *cast_ptr(void *ptr) { return ptr; } +*/ +import "C" + +func fetchError(errID C.int) error { + if errID >= C.__PATHRS_MAX_ERR_VALUE { + return nil + } + cErr := C.pathrs_errorinfo(errID) + defer C.pathrs_errorinfo_free(cErr) + + var err error + if cErr != nil { + err = &Error{ + errno: syscall.Errno(cErr.saved_errno), + description: C.GoString(cErr.description), + } + } + return err +} + +// OpenRoot wraps pathrs_open_root. +func OpenRoot(path string) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_open_root(cPath) + return uintptr(fd), fetchError(fd) +} + +// Reopen wraps pathrs_reopen. +func Reopen(fd uintptr, flags int) (uintptr, error) { + newFd := C.pathrs_reopen(C.int(fd), C.int(flags)) + return uintptr(newFd), fetchError(newFd) +} + +// InRootResolve wraps pathrs_inroot_resolve. +func InRootResolve(rootFd uintptr, path string) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_inroot_resolve(C.int(rootFd), cPath) + return uintptr(fd), fetchError(fd) +} + +// InRootResolveNoFollow wraps pathrs_inroot_resolve_nofollow. +func InRootResolveNoFollow(rootFd uintptr, path string) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_inroot_resolve_nofollow(C.int(rootFd), cPath) + return uintptr(fd), fetchError(fd) +} + +// InRootOpen wraps pathrs_inroot_open. +func InRootOpen(rootFd uintptr, path string, flags int) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_inroot_open(C.int(rootFd), cPath, C.int(flags)) + return uintptr(fd), fetchError(fd) +} + +// InRootReadlink wraps pathrs_inroot_readlink. +func InRootReadlink(rootFd uintptr, path string) (string, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + size := 128 + for { + linkBuf := make([]byte, size) + n := C.pathrs_inroot_readlink(C.int(rootFd), cPath, C.cast_ptr(unsafe.Pointer(&linkBuf[0])), C.ulong(len(linkBuf))) + switch { + case int(n) < C.__PATHRS_MAX_ERR_VALUE: + return "", fetchError(n) + case int(n) <= len(linkBuf): + return string(linkBuf[:int(n)]), nil + default: + // The contents were truncated. Unlike readlinkat, pathrs returns + // the size of the link when it checked. So use the returned size + // as a basis for the reallocated size (but in order to avoid a DoS + // where a magic-link is growing by a single byte each iteration, + // make sure we are a fair bit larger). + size += int(n) + } + } +} + +// InRootRmdir wraps pathrs_inroot_rmdir. +func InRootRmdir(rootFd uintptr, path string) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + err := C.pathrs_inroot_rmdir(C.int(rootFd), cPath) + return fetchError(err) +} + +// InRootUnlink wraps pathrs_inroot_unlink. +func InRootUnlink(rootFd uintptr, path string) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + err := C.pathrs_inroot_unlink(C.int(rootFd), cPath) + return fetchError(err) +} + +// InRootRemoveAll wraps pathrs_inroot_remove_all. +func InRootRemoveAll(rootFd uintptr, path string) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + err := C.pathrs_inroot_remove_all(C.int(rootFd), cPath) + return fetchError(err) +} + +// InRootCreat wraps pathrs_inroot_creat. +func InRootCreat(rootFd uintptr, path string, flags int, mode uint32) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_inroot_creat(C.int(rootFd), cPath, C.int(flags), C.uint(mode)) + return uintptr(fd), fetchError(fd) +} + +// InRootRename wraps pathrs_inroot_rename. +func InRootRename(rootFd uintptr, src, dst string, flags uint) error { + cSrc := C.CString(src) + defer C.free(unsafe.Pointer(cSrc)) + + cDst := C.CString(dst) + defer C.free(unsafe.Pointer(cDst)) + + err := C.pathrs_inroot_rename(C.int(rootFd), cSrc, cDst, C.uint(flags)) + return fetchError(err) +} + +// InRootMkdir wraps pathrs_inroot_mkdir. +func InRootMkdir(rootFd uintptr, path string, mode uint32) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + err := C.pathrs_inroot_mkdir(C.int(rootFd), cPath, C.uint(mode)) + return fetchError(err) +} + +// InRootMkdirAll wraps pathrs_inroot_mkdir_all. +func InRootMkdirAll(rootFd uintptr, path string, mode uint32) (uintptr, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_inroot_mkdir_all(C.int(rootFd), cPath, C.uint(mode)) + return uintptr(fd), fetchError(fd) +} + +// InRootMknod wraps pathrs_inroot_mknod. +func InRootMknod(rootFd uintptr, path string, mode uint32, dev uint64) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + err := C.pathrs_inroot_mknod(C.int(rootFd), cPath, C.uint(mode), C.dev_t(dev)) + return fetchError(err) +} + +// InRootSymlink wraps pathrs_inroot_symlink. +func InRootSymlink(rootFd uintptr, path, target string) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + cTarget := C.CString(target) + defer C.free(unsafe.Pointer(cTarget)) + + err := C.pathrs_inroot_symlink(C.int(rootFd), cPath, cTarget) + return fetchError(err) +} + +// InRootHardlink wraps pathrs_inroot_hardlink. +func InRootHardlink(rootFd uintptr, path, target string) error { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + cTarget := C.CString(target) + defer C.free(unsafe.Pointer(cTarget)) + + err := C.pathrs_inroot_hardlink(C.int(rootFd), cPath, cTarget) + return fetchError(err) +} + +// ProcBase is pathrs_proc_base_t (uint64_t). +type ProcBase C.pathrs_proc_base_t + +// FIXME: We need to open-code the constants because CGo unfortunately will +// implicitly convert any non-literal constants (i.e. those resolved using gcc) +// to signed integers. See for some +// more information on the underlying issue (though. +const ( + // ProcRoot is PATHRS_PROC_ROOT. + ProcRoot ProcBase = 0xFFFF_FFFE_7072_6F63 // C.PATHRS_PROC_ROOT + // ProcSelf is PATHRS_PROC_SELF. + ProcSelf ProcBase = 0xFFFF_FFFE_091D_5E1F // C.PATHRS_PROC_SELF + // ProcThreadSelf is PATHRS_PROC_THREAD_SELF. + ProcThreadSelf ProcBase = 0xFFFF_FFFE_3EAD_5E1F // C.PATHRS_PROC_THREAD_SELF + + // ProcBaseTypeMask is __PATHRS_PROC_TYPE_MASK. + ProcBaseTypeMask ProcBase = 0xFFFF_FFFF_0000_0000 // C.__PATHRS_PROC_TYPE_MASK + // ProcBaseTypePid is __PATHRS_PROC_TYPE_PID. + ProcBaseTypePid ProcBase = 0x8000_0000_0000_0000 // C.__PATHRS_PROC_TYPE_PID + + // ProcDefaultRootFd is PATHRS_PROC_DEFAULT_ROOTFD. + ProcDefaultRootFd = -int(syscall.EBADF) // C.PATHRS_PROC_DEFAULT_ROOTFD +) + +func assertEqual[T comparable](a, b T, msg string) { + if a != b { + panic(fmt.Sprintf("%s ((%T) %#v != (%T) %#v)", msg, a, a, b, b)) + } +} + +// Verify that the values above match the actual C values. Unfortunately, Go +// only allows us to forcefully cast int64 to uint64 if you use a temporary +// variable, which means we cannot do it in a const context and thus need to do +// it at runtime (even though it is a check that fundamentally could be done at +// compile-time)... +func init() { + var ( + actualProcRoot int64 = C.PATHRS_PROC_ROOT + actualProcSelf int64 = C.PATHRS_PROC_SELF + actualProcThreadSelf int64 = C.PATHRS_PROC_THREAD_SELF + ) + + assertEqual(ProcRoot, ProcBase(actualProcRoot), "PATHRS_PROC_ROOT") + assertEqual(ProcSelf, ProcBase(actualProcSelf), "PATHRS_PROC_SELF") + assertEqual(ProcThreadSelf, ProcBase(actualProcThreadSelf), "PATHRS_PROC_THREAD_SELF") + + var ( + actualProcBaseTypeMask uint64 = C.__PATHRS_PROC_TYPE_MASK + actualProcBaseTypePid uint64 = C.__PATHRS_PROC_TYPE_PID + ) + + assertEqual(ProcBaseTypeMask, ProcBase(actualProcBaseTypeMask), "__PATHRS_PROC_TYPE_MASK") + assertEqual(ProcBaseTypePid, ProcBase(actualProcBaseTypePid), "__PATHRS_PROC_TYPE_PID") + + assertEqual(ProcDefaultRootFd, int(C.PATHRS_PROC_DEFAULT_ROOTFD), "PATHRS_PROC_DEFAULT_ROOTFD") +} + +// ProcPid reimplements the PROC_PID(x) conversion. +func ProcPid(pid uint32) ProcBase { return ProcBaseTypePid | ProcBase(pid) } + +// ProcOpenat wraps pathrs_proc_openat. +func ProcOpenat(procRootFd int, base ProcBase, path string, flags int) (uintptr, error) { + cBase := C.pathrs_proc_base_t(base) + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + fd := C.pathrs_proc_openat(C.int(procRootFd), cBase, cPath, C.int(flags)) + return uintptr(fd), fetchError(fd) +} + +// ProcReadlinkat wraps pathrs_proc_readlinkat. +func ProcReadlinkat(procRootFd int, base ProcBase, path string) (string, error) { + // TODO: See if we can unify this code with InRootReadlink. + + cBase := C.pathrs_proc_base_t(base) + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + size := 128 + for { + linkBuf := make([]byte, size) + n := C.pathrs_proc_readlinkat( + C.int(procRootFd), cBase, cPath, + C.cast_ptr(unsafe.Pointer(&linkBuf[0])), C.ulong(len(linkBuf))) + switch { + case int(n) < C.__PATHRS_MAX_ERR_VALUE: + return "", fetchError(n) + case int(n) <= len(linkBuf): + return string(linkBuf[:int(n)]), nil + default: + // The contents were truncated. Unlike readlinkat, pathrs returns + // the size of the link when it checked. So use the returned size + // as a basis for the reallocated size (but in order to avoid a DoS + // where a magic-link is growing by a single byte each iteration, + // make sure we are a fair bit larger). + size += int(n) + } + } +} + +// ProcfsOpenHow is pathrs_procfs_open_how (struct). +type ProcfsOpenHow C.pathrs_procfs_open_how + +const ( + // ProcfsNewUnmasked is PATHRS_PROCFS_NEW_UNMASKED. + ProcfsNewUnmasked = C.PATHRS_PROCFS_NEW_UNMASKED +) + +// Flags returns a pointer to the internal flags field to allow other packages +// to modify structure fields that are internal due to Go's visibility model. +func (how *ProcfsOpenHow) Flags() *C.uint64_t { return &how.flags } + +// ProcfsOpen is pathrs_procfs_open (sizeof(*how) is passed automatically). +func ProcfsOpen(how *ProcfsOpenHow) (uintptr, error) { + fd := C.pathrs_procfs_open((*C.pathrs_procfs_open_how)(how), C.size_t(unsafe.Sizeof(*how))) + return uintptr(fd), fetchError(fd) +} diff --git a/vendor/cyphar.com/go-pathrs/procfs/procfs_linux.go b/vendor/cyphar.com/go-pathrs/procfs/procfs_linux.go new file mode 100644 index 0000000000..5533c427cb --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/procfs/procfs_linux.go @@ -0,0 +1,246 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +// Package procfs provides a safe API for operating on /proc on Linux. +package procfs + +import ( + "os" + "runtime" + + "cyphar.com/go-pathrs/internal/fdutils" + "cyphar.com/go-pathrs/internal/libpathrs" +) + +// ProcBase is used with [ProcReadlink] and related functions to indicate what +// /proc subpath path operations should be done relative to. +type ProcBase struct { + inner libpathrs.ProcBase +} + +var ( + // ProcRoot indicates to use /proc. Note that this mode may be more + // expensive because we have to take steps to try to avoid leaking unmasked + // procfs handles, so you should use [ProcBaseSelf] if you can. + ProcRoot = ProcBase{inner: libpathrs.ProcRoot} + // ProcSelf indicates to use /proc/self. For most programs, this is the + // standard choice. + ProcSelf = ProcBase{inner: libpathrs.ProcSelf} + // ProcThreadSelf indicates to use /proc/thread-self. In multi-threaded + // programs where one thread has a different CLONE_FS, it is possible for + // /proc/self to point the wrong thread and so /proc/thread-self may be + // necessary. + ProcThreadSelf = ProcBase{inner: libpathrs.ProcThreadSelf} +) + +// ProcPid returns a ProcBase which indicates to use /proc/$pid for the given +// PID (or TID). Be aware that due to PID recycling, using this is generally +// not safe except in certain circumstances. Namely: +// +// - PID 1 (the init process), as that PID cannot ever get recycled. +// - Your current PID (though you should just use [ProcBaseSelf]). +// - Your current TID if you have used [runtime.LockOSThread] (though you +// should just use [ProcBaseThreadSelf]). +// - PIDs of child processes (as long as you are sure that no other part of +// your program incorrectly catches or ignores SIGCHLD, and that you do it +// *before* you call wait(2)or any equivalent method that could reap +// zombies). +func ProcPid(pid int) ProcBase { + if pid < 0 || pid >= 1<<31 { + panic("invalid ProcBasePid value") // TODO: should this be an error? + } + return ProcBase{inner: libpathrs.ProcPid(uint32(pid))} +} + +// ThreadCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [Handle.OpenThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ThreadCloser func() + +// Handle is a wrapper around an *os.File handle to "/proc", which can be +// used to do further procfs-related operations in a safe way. +type Handle struct { + inner *os.File +} + +// Close releases all internal resources for this [Handle]. +// +// Note that if the handle is actually the global cached handle, this operation +// is a no-op. +func (proc *Handle) Close() error { + var err error + if proc.inner != nil { + err = proc.inner.Close() + } + return err +} + +// OpenOption is a configuration function passed as an argument to [Open]. +type OpenOption func(*libpathrs.ProcfsOpenHow) error + +// UnmaskedProcRoot can be passed to [Open] to request an unmasked procfs +// handle be created. +// +// procfs, err := procfs.OpenRoot(procfs.UnmaskedProcRoot) +func UnmaskedProcRoot(how *libpathrs.ProcfsOpenHow) error { + *how.Flags() |= libpathrs.ProcfsNewUnmasked + return nil +} + +// Open creates a new [Handle] to a safe "/proc", based on the passed +// configuration options (in the form of a series of [OpenOption]s). +func Open(opts ...OpenOption) (*Handle, error) { + var how libpathrs.ProcfsOpenHow + for _, opt := range opts { + if err := opt(&how); err != nil { + return nil, err + } + } + fd, err := libpathrs.ProcfsOpen(&how) + if err != nil { + return nil, err + } + var procFile *os.File + if int(fd) >= 0 { + procFile = os.NewFile(fd, "/proc") + } + // TODO: Check that fd == PATHRS_PROC_DEFAULT_ROOTFD in the <0 case? + return &Handle{inner: procFile}, nil +} + +// TODO: Switch to something fdutils.WithFileFd-like. +func (proc *Handle) fd() int { + if proc.inner != nil { + return int(proc.inner.Fd()) + } + return libpathrs.ProcDefaultRootFd +} + +// TODO: Should we expose open? +func (proc *Handle) open(base ProcBase, path string, flags int) (_ *os.File, Closer ThreadCloser, Err error) { + var closer ThreadCloser + if base == ProcThreadSelf { + runtime.LockOSThread() + closer = runtime.UnlockOSThread + } + defer func() { + if closer != nil && Err != nil { + closer() + Closer = nil + } + }() + + fd, err := libpathrs.ProcOpenat(proc.fd(), base.inner, path, flags) + if err != nil { + return nil, nil, err + } + file, err := fdutils.MkFile(fd) + return file, closer, err +} + +// OpenRoot safely opens a given path from inside /proc/. +// +// This function must only be used for accessing global information from procfs +// (such as /proc/cpuinfo) or information about other processes (such as +// /proc/1). Accessing your own process information should be done using +// [Handle.OpenSelf] or [Handle.OpenThreadSelf]. +func (proc *Handle) OpenRoot(path string, flags int) (*os.File, error) { + file, closer, err := proc.open(ProcRoot, path, flags) + if closer != nil { + // should not happen + panic("non-zero closer returned from procOpen(ProcRoot)") + } + return file, err +} + +// OpenSelf safely opens a given path from inside /proc/self/. +// +// This method is recommend for getting process information about the current +// process for almost all Go processes *except* for cases where there are +// [runtime.LockOSThread] threads that have changed some aspect of their state +// (such as through unshare(CLONE_FS) or changing namespaces). +// +// For such non-heterogeneous processes, /proc/self may reference to a task +// that has different state from the current goroutine and so it may be +// preferable to use [Handle.OpenThreadSelf]. The same is true if a user +// really wants to inspect the current OS thread's information (such as +// /proc/thread-self/stack or /proc/thread-self/status which is always uniquely +// per-thread). +// +// Unlike [Handle.OpenThreadSelf], this method does not involve locking +// the goroutine to the current OS thread and so is simpler to use and +// theoretically has slightly less overhead. +// +// [runtime.LockOSThread]: https://pkg.go.dev/runtime#LockOSThread +func (proc *Handle) OpenSelf(path string, flags int) (*os.File, error) { + file, closer, err := proc.open(ProcSelf, path, flags) + if closer != nil { + // should not happen + panic("non-zero closer returned from procOpen(ProcSelf)") + } + return file, err +} + +// OpenPid safely opens a given path from inside /proc/$pid/, where pid can be +// either a PID or TID. +// +// This is effectively equivalent to calling [Handle.OpenRoot] with the +// pid prefixed to the subpath. +// +// Be aware that due to PID recycling, using this is generally not safe except +// in certain circumstances. See the documentation of [ProcPid] for more +// details. +func (proc *Handle) OpenPid(pid int, path string, flags int) (*os.File, error) { + file, closer, err := proc.open(ProcPid(pid), path, flags) + if closer != nil { + // should not happen + panic("non-zero closer returned from procOpen(ProcPidOpen)") + } + return file, err +} + +// OpenThreadSelf safely opens a given path from inside /proc/thread-self/. +// +// Most Go processes have heterogeneous threads (all threads have most of the +// same kernel state such as CLONE_FS) and so [Handle.OpenSelf] is +// preferable for most users. +// +// For non-heterogeneous threads, or users that actually want thread-specific +// information (such as /proc/thread-self/stack or /proc/thread-self/status), +// this method is necessary. +// +// Because Go can change the running OS thread of your goroutine without notice +// (and then subsequently kill the old thread), this method will lock the +// current goroutine to the OS thread (with [runtime.LockOSThread]) and the +// caller is responsible for unlocking the the OS thread with the +// [ThreadCloser] callback once they are done using the returned file. This +// callback MUST be called AFTER you have finished using the returned +// [os.File]. This callback is completely separate to [os.File.Close], so it +// must be called regardless of how you close the handle. +// +// [runtime.LockOSThread]: https://pkg.go.dev/runtime#LockOSThread +// [os.File]: https://pkg.go.dev/os#File +// [os.File.Close]: https://pkg.go.dev/os#File.Close +func (proc *Handle) OpenThreadSelf(path string, flags int) (*os.File, ThreadCloser, error) { + return proc.open(ProcThreadSelf, path, flags) +} + +// Readlink safely reads the contents of a symlink from the given procfs base. +// +// This is effectively equivalent to doing an Open*(O_PATH|O_NOFOLLOW) of the +// path and then doing unix.Readlinkat(fd, ""), but with the benefit that +// thread locking is not necessary for [ProcThreadSelf]. +func (proc *Handle) Readlink(base ProcBase, path string) (string, error) { + return libpathrs.ProcReadlinkat(proc.fd(), base.inner, path) +} diff --git a/vendor/cyphar.com/go-pathrs/root_linux.go b/vendor/cyphar.com/go-pathrs/root_linux.go new file mode 100644 index 0000000000..edc9e4c87f --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/root_linux.go @@ -0,0 +1,367 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +package pathrs + +import ( + "errors" + "fmt" + "os" + "syscall" + + "cyphar.com/go-pathrs/internal/fdutils" + "cyphar.com/go-pathrs/internal/libpathrs" +) + +// Root is a handle to the root of a directory tree to resolve within. The only +// purpose of this "root handle" is to perform operations within the directory +// tree, or to get a [Handle] to inodes within the directory tree. +// +// At time of writing, it is considered a *VERY BAD IDEA* to open a [Root] +// inside a possibly-attacker-controlled directory tree. While we do have +// protections that should defend against it, it's far more dangerous than just +// opening a directory tree which is not inside a potentially-untrusted +// directory. +type Root struct { + inner *os.File +} + +// OpenRoot creates a new [Root] handle to the directory at the given path. +func OpenRoot(path string) (*Root, error) { + fd, err := libpathrs.OpenRoot(path) + if err != nil { + return nil, err + } + file, err := fdutils.MkFile(fd) + if err != nil { + return nil, err + } + return &Root{inner: file}, nil +} + +// RootFromFile creates a new [Root] handle from an [os.File] referencing a +// directory. The provided file will be duplicated, so the original file should +// still be closed by the caller. +// +// This is effectively the inverse operation of [Root.IntoFile]. +// +// [os.File]: https://pkg.go.dev/os#File +func RootFromFile(file *os.File) (*Root, error) { + newFile, err := fdutils.DupFile(file) + if err != nil { + return nil, fmt.Errorf("duplicate root fd: %w", err) + } + return &Root{inner: newFile}, nil +} + +// Resolve resolves the given path within the [Root]'s directory tree, and +// returns a [Handle] to the resolved path. The path must already exist, +// otherwise an error will occur. +// +// All symlinks (including trailing symlinks) are followed, but they are +// resolved within the rootfs. If you wish to open a handle to the symlink +// itself, use [ResolveNoFollow]. +func (r *Root) Resolve(path string) (*Handle, error) { + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (*Handle, error) { + handleFd, err := libpathrs.InRootResolve(rootFd, path) + if err != nil { + return nil, err + } + handleFile, err := fdutils.MkFile(handleFd) + if err != nil { + return nil, err + } + return &Handle{inner: handleFile}, nil + }) +} + +// ResolveNoFollow is effectively an O_NOFOLLOW version of [Resolve]. Their +// behaviour is identical, except that *trailing* symlinks will not be +// followed. If the final component is a trailing symlink, an O_PATH|O_NOFOLLOW +// handle to the symlink itself is returned. +func (r *Root) ResolveNoFollow(path string) (*Handle, error) { + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (*Handle, error) { + handleFd, err := libpathrs.InRootResolveNoFollow(rootFd, path) + if err != nil { + return nil, err + } + handleFile, err := fdutils.MkFile(handleFd) + if err != nil { + return nil, err + } + return &Handle{inner: handleFile}, nil + }) +} + +// Open is effectively shorthand for [Resolve] followed by [Handle.Open], but +// can be slightly more efficient (it reduces CGo overhead and the number of +// syscalls used when using the openat2-based resolver) and is arguably more +// ergonomic to use. +// +// This is effectively equivalent to [os.Open]. +// +// [os.Open]: https://pkg.go.dev/os#Open +func (r *Root) Open(path string) (*os.File, error) { + return r.OpenFile(path, os.O_RDONLY) +} + +// OpenFile is effectively shorthand for [Resolve] followed by +// [Handle.OpenFile], but can be slightly more efficient (it reduces CGo +// overhead and the number of syscalls used when using the openat2-based +// resolver) and is arguably more ergonomic to use. +// +// However, if flags contains os.O_NOFOLLOW and the path is a symlink, then +// OpenFile's behaviour will match that of openat2. In most cases an error will +// be returned, but if os.O_PATH is provided along with os.O_NOFOLLOW then a +// file equivalent to [ResolveNoFollow] will be returned instead. +// +// This is effectively equivalent to [os.OpenFile], except that os.O_CREAT is +// not supported. +// +// [os.OpenFile]: https://pkg.go.dev/os#OpenFile +func (r *Root) OpenFile(path string, flags int) (*os.File, error) { + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (*os.File, error) { + fd, err := libpathrs.InRootOpen(rootFd, path, flags) + if err != nil { + return nil, err + } + return fdutils.MkFile(fd) + }) +} + +// Create creates a file within the [Root]'s directory tree at the given path, +// and returns a handle to the file. The provided mode is used for the new file +// (the process's umask applies). +// +// Unlike [os.Create], if the file already exists an error is created rather +// than the file being opened and truncated. +// +// [os.Create]: https://pkg.go.dev/os#Create +func (r *Root) Create(path string, flags int, mode os.FileMode) (*os.File, error) { + unixMode, err := toUnixMode(mode, false) + if err != nil { + return nil, err + } + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (*os.File, error) { + handleFd, err := libpathrs.InRootCreat(rootFd, path, flags, unixMode) + if err != nil { + return nil, err + } + return fdutils.MkFile(handleFd) + }) +} + +// Rename two paths within a [Root]'s directory tree. The flags argument is +// identical to the RENAME_* flags to the renameat2(2) system call. +func (r *Root) Rename(src, dst string, flags uint) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootRename(rootFd, src, dst, flags) + return struct{}{}, err + }) + return err +} + +// RemoveDir removes the named empty directory within a [Root]'s directory +// tree. +func (r *Root) RemoveDir(path string) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootRmdir(rootFd, path) + return struct{}{}, err + }) + return err +} + +// RemoveFile removes the named file within a [Root]'s directory tree. +func (r *Root) RemoveFile(path string) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootUnlink(rootFd, path) + return struct{}{}, err + }) + return err +} + +// Remove removes the named file or (empty) directory within a [Root]'s +// directory tree. +// +// This is effectively equivalent to [os.Remove]. +// +// [os.Remove]: https://pkg.go.dev/os#Remove +func (r *Root) Remove(path string) error { + // In order to match os.Remove's implementation we need to also do both + // syscalls unconditionally and adjust the error based on whether + // pathrs_inroot_rmdir() returned ENOTDIR. + unlinkErr := r.RemoveFile(path) + if unlinkErr == nil { + return nil + } + rmdirErr := r.RemoveDir(path) + if rmdirErr == nil { + return nil + } + // Both failed, adjust the error in the same way that os.Remove does. + err := rmdirErr + if errors.Is(err, syscall.ENOTDIR) { + err = unlinkErr + } + return err +} + +// RemoveAll recursively deletes a path and all of its children. +// +// This is effectively equivalent to [os.RemoveAll]. +// +// [os.RemoveAll]: https://pkg.go.dev/os#RemoveAll +func (r *Root) RemoveAll(path string) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootRemoveAll(rootFd, path) + return struct{}{}, err + }) + return err +} + +// Mkdir creates a directory within a [Root]'s directory tree. The provided +// mode is used for the new directory (the process's umask applies). +// +// This is effectively equivalent to [os.Mkdir]. +// +// [os.Mkdir]: https://pkg.go.dev/os#Mkdir +func (r *Root) Mkdir(path string, mode os.FileMode) error { + unixMode, err := toUnixMode(mode, false) + if err != nil { + return err + } + + _, err = fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootMkdir(rootFd, path, unixMode) + return struct{}{}, err + }) + return err +} + +// MkdirAll creates a directory (and any parent path components if they don't +// exist) within a [Root]'s directory tree. The provided mode is used for any +// directories created by this function (the process's umask applies). +// +// This is effectively equivalent to [os.MkdirAll]. +// +// [os.MkdirAll]: https://pkg.go.dev/os#MkdirAll +func (r *Root) MkdirAll(path string, mode os.FileMode) (*Handle, error) { + unixMode, err := toUnixMode(mode, false) + if err != nil { + return nil, err + } + + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (*Handle, error) { + handleFd, err := libpathrs.InRootMkdirAll(rootFd, path, unixMode) + if err != nil { + return nil, err + } + handleFile, err := fdutils.MkFile(handleFd) + if err != nil { + return nil, err + } + return &Handle{inner: handleFile}, err + }) +} + +// Mknod creates a new device inode of the given type within a [Root]'s +// directory tree. The provided mode is used for the new directory (the +// process's umask applies). +// +// This is effectively equivalent to [unix.Mknod]. +// +// [unix.Mknod]: https://pkg.go.dev/golang.org/x/sys/unix#Mknod +func (r *Root) Mknod(path string, mode os.FileMode, dev uint64) error { + unixMode, err := toUnixMode(mode, true) + if err != nil { + return err + } + + _, err = fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootMknod(rootFd, path, unixMode, dev) + return struct{}{}, err + }) + return err +} + +// Symlink creates a symlink within a [Root]'s directory tree. The symlink is +// created at path and is a link to target. +// +// This is effectively equivalent to [os.Symlink]. +// +// [os.Symlink]: https://pkg.go.dev/os#Symlink +func (r *Root) Symlink(path, target string) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootSymlink(rootFd, path, target) + return struct{}{}, err + }) + return err +} + +// Hardlink creates a hardlink within a [Root]'s directory tree. The hardlink +// is created at path and is a link to target. Both paths are within the +// [Root]'s directory tree (you cannot hardlink to a different [Root] or the +// host). +// +// This is effectively equivalent to [os.Link]. +// +// [os.Link]: https://pkg.go.dev/os#Link +func (r *Root) Hardlink(path, target string) error { + _, err := fdutils.WithFileFd(r.inner, func(rootFd uintptr) (struct{}, error) { + err := libpathrs.InRootHardlink(rootFd, path, target) + return struct{}{}, err + }) + return err +} + +// Readlink returns the target of a symlink with a [Root]'s directory tree. +// +// This is effectively equivalent to [os.Readlink]. +// +// [os.Readlink]: https://pkg.go.dev/os#Readlink +func (r *Root) Readlink(path string) (string, error) { + return fdutils.WithFileFd(r.inner, func(rootFd uintptr) (string, error) { + return libpathrs.InRootReadlink(rootFd, path) + }) +} + +// IntoFile unwraps the [Root] into its underlying [os.File]. +// +// It is critical that you do not operate on this file descriptor yourself, +// because the security properties of libpathrs depend on users doing all +// relevant filesystem operations through libpathrs. +// +// This operation returns the internal [os.File] of the [Root] directly, so +// calling [Root.Close] will also close any copies of the returned [os.File]. +// If you want to get an independent copy, use [Root.Clone] followed by +// [Root.IntoFile] on the cloned [Root]. +// +// [os.File]: https://pkg.go.dev/os#File +func (r *Root) IntoFile() *os.File { + // TODO: Figure out if we really don't want to make a copy. + // TODO: We almost certainly want to clear r.inner here, but we can't do + // that easily atomically (we could use atomic.Value but that'll make + // things quite a bit uglier). + return r.inner +} + +// Clone creates a copy of a [Root] handle, such that it has a separate +// lifetime to the original (while referring to the same underlying directory). +func (r *Root) Clone() (*Root, error) { + return RootFromFile(r.inner) +} + +// Close frees all of the resources used by the [Root] handle. +func (r *Root) Close() error { + return r.inner.Close() +} diff --git a/vendor/cyphar.com/go-pathrs/utils_linux.go b/vendor/cyphar.com/go-pathrs/utils_linux.go new file mode 100644 index 0000000000..2208d608f8 --- /dev/null +++ b/vendor/cyphar.com/go-pathrs/utils_linux.go @@ -0,0 +1,56 @@ +//go:build linux + +// SPDX-License-Identifier: MPL-2.0 +/* + * libpathrs: safe path resolution on Linux + * Copyright (C) 2019-2025 Aleksa Sarai + * Copyright (C) 2019-2025 SUSE LLC + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +package pathrs + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +//nolint:cyclop // this function needs to handle a lot of cases +func toUnixMode(mode os.FileMode, needsType bool) (uint32, error) { + sysMode := uint32(mode.Perm()) + switch mode & os.ModeType { //nolint:exhaustive // we only care about ModeType bits + case 0: + if needsType { + sysMode |= unix.S_IFREG + } + case os.ModeDir: + sysMode |= unix.S_IFDIR + case os.ModeSymlink: + sysMode |= unix.S_IFLNK + case os.ModeCharDevice | os.ModeDevice: + sysMode |= unix.S_IFCHR + case os.ModeDevice: + sysMode |= unix.S_IFBLK + case os.ModeNamedPipe: + sysMode |= unix.S_IFIFO + case os.ModeSocket: + sysMode |= unix.S_IFSOCK + default: + return 0, fmt.Errorf("invalid mode filetype %+o", mode) + } + if mode&os.ModeSetuid != 0 { + sysMode |= unix.S_ISUID + } + if mode&os.ModeSetgid != 0 { + sysMode |= unix.S_ISGID + } + if mode&os.ModeSticky != 0 { + sysMode |= unix.S_ISVTX + } + return sysMode, nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/.gitignore b/vendor/github.com/checkpoint-restore/go-criu/v6/.gitignore new file mode 100644 index 0000000000..5518060133 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/.gitignore @@ -0,0 +1,13 @@ +test/test +test/test.coverage +test/piggie/piggie +test/phaul/phaul +test/phaul/phaul.coverage +test/loop/loop +test/crit/crit-test +test/crit/test-imgs +image +scripts/*.h +scripts/expected.go +scripts/output.go +crit/bin diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/.golangci.yml b/vendor/github.com/checkpoint-restore/go-criu/v6/.golangci.yml new file mode 100644 index 0000000000..c4515109b1 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/.golangci.yml @@ -0,0 +1,10 @@ +linters: + presets: + - bugs + - performance + - unused + - format + +linters-settings: + exhaustive: + default-signifies-exhaustive: true diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/LICENSE b/vendor/github.com/checkpoint-restore/go-criu/v6/LICENSE new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/Makefile b/vendor/github.com/checkpoint-restore/go-criu/v6/Makefile new file mode 100644 index 0000000000..0c2916001e --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/Makefile @@ -0,0 +1,41 @@ +SHELL = /bin/bash +GO ?= go +CC ?= gcc + +all: build + +lint: + golangci-lint run ./... + +build: rpc/rpc.pb.go stats/stats.pb.go + $(GO) build -v ./... + # Build crit binary + $(MAKE) -C crit bin/crit + +test: build + $(MAKE) -C test + +coverage: + $(MAKE) -C test coverage + +codecov: + $(MAKE) -C test codecov + +rpc/rpc.proto: + curl -sSL https://raw.githubusercontent.com/checkpoint-restore/criu/master/images/rpc.proto -o $@ + +rpc/rpc.pb.go: rpc/rpc.proto + protoc --go_out=. --go_opt=M$^=rpc/ $^ + +stats/stats.proto: + curl -sSL https://raw.githubusercontent.com/checkpoint-restore/criu/master/images/stats.proto -o $@ + +stats/stats.pb.go: stats/stats.proto + protoc --go_out=. --go_opt=M$^=stats/ $^ + +vendor: + GO111MODULE=on $(GO) mod tidy + GO111MODULE=on $(GO) mod vendor + GO111MODULE=on $(GO) mod verify + +.PHONY: build test lint vendor coverage codecov diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/README.md b/vendor/github.com/checkpoint-restore/go-criu/v6/README.md new file mode 100644 index 0000000000..d186cb8960 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/README.md @@ -0,0 +1,105 @@ +[![test](https://github.com/checkpoint-restore/go-criu/workflows/ci/badge.svg?branch=master)](https://github.com/checkpoint-restore/go-criu/actions?query=workflow%3Aci) +[![verify](https://github.com/checkpoint-restore/go-criu/workflows/verify/badge.svg?branch=master)](https://github.com/checkpoint-restore/go-criu/actions?query=workflow%3Averify) +[![Go Reference](https://pkg.go.dev/badge/github.com/checkpoint-restore/go-criu.svg)](https://pkg.go.dev/github.com/checkpoint-restore/go-criu) + +## go-criu -- Go bindings for CRIU + +This repository provides Go bindings for [CRIU](https://criu.org/). +The code is based on the Go-based PHaul implementation from the CRIU repository. +For easier inclusion into other Go projects, the CRIU Go bindings have been moved to this repository. + +### CRIU +The Go bindings provide an easy way to use the CRIU RPC calls from Go without +the need to set up all the infrastructure to make the actual RPC connection to CRIU. + +The following example would print the version of CRIU: +```go +import ( + "log" + + "github.com/checkpoint-restore/go-criu/v6" +) + +func main() { + c := criu.MakeCriu() + version, err := c.GetCriuVersion() + if err != nil { + log.Fatalln(err) + } + log.Println(version) +} +``` + +or to just check if at least a certain CRIU version is installed: + +```go + c := criu.MakeCriu() + result, err := c.IsCriuAtLeast(31100) +``` + +### CRIT + +The `crit` package provides bindings to decode, encode, and manipulate +CRIU image files natively within Go. It also provides a CLI tool similar +to the original CRIT Python tool. To get started with this, see the docs +at https://criu.org/CRIT_(Go_library). + +## Releases + +The first go-criu release was 3.11 based on CRIU 3.11. The initial plan +was to follow CRIU so that go-criu would carry the same version number as +CRIU. + +As go-criu is imported in other projects and as Go modules are expected +to follow Semantic Versioning go-criu will also follow Semantic Versioning +starting with the 4.0.0 release. + +The following table shows the relation between go-criu and criu versions: + +| Major version | Latest release | CRIU version | +| -------------- | -------------- | ------------ | +| v6             | 6.2.0         | 3.17         | +| v5             | 5.3.0         | 3.16         | +| v5             | 5.0.0         | 3.15         | +| v4             | 4.1.0         | 3.14         | + +## How to contribute + +While bug fixes can first be identified via an "issue", that is not required. +It's ok to just open up a PR with the fix, but make sure you include the same +information you would have included in an issue - like how to reproduce it. + +PRs for new features should include some background on what use cases the +new code is trying to address. When possible and when it makes sense, try to +break-up larger PRs into smaller ones - it's easier to review smaller +code changes. But only if those smaller ones make sense as stand-alone PRs. + +Regardless of the type of PR, all PRs should include: +* well documented code changes +* additional testcases. Ideally, they should fail w/o your code change applied +* documentation changes + +Squash your commits into logical pieces of work that might want to be reviewed +separate from the rest of the PRs. Ideally, each commit should implement a +single idea, and the PR branch should pass the tests at every commit. GitHub +makes it easy to review the cumulative effect of many commits; so, when in +doubt, use smaller commits. + +PRs that fix issues should include a reference like `Closes #XXXX` in the +commit message so that github will automatically close the referenced issue +when the PR is merged. + +Contributors must assert that they are in compliance with the [Developer +Certificate of Origin 1.1](http://developercertificate.org/). This is achieved +by adding a "Signed-off-by" line containing the contributor's name and e-mail +to every commit message. Your signature certifies that you wrote the patch or +otherwise have the right to pass it on as an open-source patch. + +## License and copyright + +Unless mentioned otherwise in a specific file's header, all code in +this project is released under the Apache 2.0 license. + +The author of a change remains the copyright holder of their code +(no copyright assignment). The list of authors and contributors can be +retrieved from the git commit history and in some cases, the file headers. diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/features.go b/vendor/github.com/checkpoint-restore/go-criu/v6/features.go new file mode 100644 index 0000000000..4e779d95bc --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/features.go @@ -0,0 +1,45 @@ +package criu + +import ( + "fmt" + + "github.com/checkpoint-restore/go-criu/v6/rpc" +) + +// Feature checking in go-criu is based on the libcriu feature checking function. + +// Feature checking allows the user to check if CRIU supports +// certain features. There are CRIU features which do not depend +// on the version of CRIU but on kernel features or architecture. +// +// One example is memory tracking. Memory tracking can be disabled +// in the kernel or there are architectures which do not support +// it (aarch64 for example). By using the feature check a libcriu +// user can easily query CRIU if a certain feature is available. +// +// The features which should be checked can be marked in the +// structure 'struct criu_feature_check'. Each structure member +// that is set to true will result in CRIU checking for the +// availability of that feature in the current combination of +// CRIU/kernel/architecture. +// +// Available features will be set to true when the function +// returns successfully. Missing features will be set to false. + +func (c *Criu) FeatureCheck(features *rpc.CriuFeatures) (*rpc.CriuFeatures, error) { + resp, err := c.doSwrkWithResp( + rpc.CriuReqType_FEATURE_CHECK, + nil, + nil, + features, + ) + if err != nil { + return nil, err + } + + if resp.GetType() != rpc.CriuReqType_FEATURE_CHECK { + return nil, fmt.Errorf("Unexpected CRIU RPC response") + } + + return features, nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/main.go b/vendor/github.com/checkpoint-restore/go-criu/v6/main.go new file mode 100644 index 0000000000..2e099c859c --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/main.go @@ -0,0 +1,264 @@ +package criu + +import ( + "errors" + "fmt" + "os" + "os/exec" + "strconv" + "syscall" + + "github.com/checkpoint-restore/go-criu/v6/rpc" + "google.golang.org/protobuf/proto" +) + +// Criu struct +type Criu struct { + swrkCmd *exec.Cmd + swrkSk *os.File + swrkPath string +} + +// MakeCriu returns the Criu object required for most operations +func MakeCriu() *Criu { + return &Criu{ + swrkPath: "criu", + } +} + +// SetCriuPath allows setting the path to the CRIU binary +// if it is in a non standard location +func (c *Criu) SetCriuPath(path string) { + c.swrkPath = path +} + +// Prepare sets up everything for the RPC communication to CRIU +func (c *Criu) Prepare() error { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET, 0) + if err != nil { + return err + } + + cln := os.NewFile(uintptr(fds[0]), "criu-xprt-cln") + syscall.CloseOnExec(fds[0]) + srv := os.NewFile(uintptr(fds[1]), "criu-xprt-srv") + defer srv.Close() + + args := []string{"swrk", strconv.Itoa(fds[1])} + // #nosec G204 + cmd := exec.Command(c.swrkPath, args...) + + err = cmd.Start() + if err != nil { + cln.Close() + return err + } + + c.swrkCmd = cmd + c.swrkSk = cln + + return nil +} + +// Cleanup cleans up +func (c *Criu) Cleanup() { + if c.swrkCmd != nil { + c.swrkSk.Close() + c.swrkSk = nil + _ = c.swrkCmd.Wait() + c.swrkCmd = nil + } +} + +func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) { + cln := c.swrkSk + _, err := cln.Write(reqB) + if err != nil { + return nil, 0, err + } + + respB := make([]byte, 2*4096) + n, err := cln.Read(respB) + if err != nil { + return nil, 0, err + } + + return respB, n, nil +} + +func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) error { + resp, err := c.doSwrkWithResp(reqType, opts, nfy, nil) + if err != nil { + return err + } + respType := resp.GetType() + if respType != reqType { + return errors.New("unexpected CRIU RPC response") + } + + return nil +} + +func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (*rpc.CriuResp, error) { + var resp *rpc.CriuResp + + req := rpc.CriuReq{ + Type: &reqType, + Opts: opts, + } + + if nfy != nil { + opts.NotifyScripts = proto.Bool(true) + } + + if features != nil { + req.Features = features + } + + if c.swrkCmd == nil { + err := c.Prepare() + if err != nil { + return nil, err + } + + defer c.Cleanup() + } + + for { + reqB, err := proto.Marshal(&req) + if err != nil { + return nil, err + } + + respB, respS, err := c.sendAndRecv(reqB) + if err != nil { + return nil, err + } + + resp = &rpc.CriuResp{} + err = proto.Unmarshal(respB[:respS], resp) + if err != nil { + return nil, err + } + + if !resp.GetSuccess() { + return resp, fmt.Errorf("operation failed (msg:%s err:%d)", + resp.GetCrErrmsg(), resp.GetCrErrno()) + } + + respType := resp.GetType() + if respType != rpc.CriuReqType_NOTIFY { + break + } + if nfy == nil { + return resp, errors.New("unexpected notify") + } + + notify := resp.GetNotify() + switch notify.GetScript() { + case "pre-dump": + err = nfy.PreDump() + case "post-dump": + err = nfy.PostDump() + case "pre-restore": + err = nfy.PreRestore() + case "post-restore": + err = nfy.PostRestore(notify.GetPid()) + case "network-lock": + err = nfy.NetworkLock() + case "network-unlock": + err = nfy.NetworkUnlock() + case "setup-namespaces": + err = nfy.SetupNamespaces(notify.GetPid()) + case "post-setup-namespaces": + err = nfy.PostSetupNamespaces() + case "post-resume": + err = nfy.PostResume() + default: + err = nil + } + + if err != nil { + return resp, err + } + + req = rpc.CriuReq{ + Type: &respType, + NotifySuccess: proto.Bool(true), + } + } + + return resp, nil +} + +// Dump dumps a process +func (c *Criu) Dump(opts *rpc.CriuOpts, nfy Notify) error { + return c.doSwrk(rpc.CriuReqType_DUMP, opts, nfy) +} + +// Restore restores a process +func (c *Criu) Restore(opts *rpc.CriuOpts, nfy Notify) error { + return c.doSwrk(rpc.CriuReqType_RESTORE, opts, nfy) +} + +// PreDump does a pre-dump +func (c *Criu) PreDump(opts *rpc.CriuOpts, nfy Notify) error { + return c.doSwrk(rpc.CriuReqType_PRE_DUMP, opts, nfy) +} + +// StartPageServer starts the page server +func (c *Criu) StartPageServer(opts *rpc.CriuOpts) error { + return c.doSwrk(rpc.CriuReqType_PAGE_SERVER, opts, nil) +} + +// StartPageServerChld starts the page server and returns PID and port +func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) { + resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil, nil) + if err != nil { + return 0, 0, err + } + + return int(resp.Ps.GetPid()), int(resp.Ps.GetPort()), nil +} + +// GetCriuVersion executes the VERSION RPC call and returns the version +// as an integer. Major * 10000 + Minor * 100 + SubLevel +func (c *Criu) GetCriuVersion() (int, error) { + resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil, nil) + if err != nil { + return 0, err + } + + if resp.GetType() != rpc.CriuReqType_VERSION { + return 0, fmt.Errorf("Unexpected CRIU RPC response") + } + + version := int(*resp.GetVersion().MajorNumber) * 10000 + version += int(*resp.GetVersion().MinorNumber) * 100 + if resp.GetVersion().Sublevel != nil { + version += int(*resp.GetVersion().Sublevel) + } + + if resp.GetVersion().Gitid != nil { + // taken from runc: if it is a git release -> increase minor by 1 + version -= (version % 100) + version += 100 + } + + return version, nil +} + +// IsCriuAtLeast checks if the version is at least the same +// as the parameter version +func (c *Criu) IsCriuAtLeast(version int) (bool, error) { + criuVersion, err := c.GetCriuVersion() + if err != nil { + return false, err + } + + if criuVersion >= version { + return true, nil + } + + return false, nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/notify.go b/vendor/github.com/checkpoint-restore/go-criu/v6/notify.go new file mode 100644 index 0000000000..a177f2bb5c --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/notify.go @@ -0,0 +1,62 @@ +package criu + +// Notify interface +type Notify interface { + PreDump() error + PostDump() error + PreRestore() error + PostRestore(pid int32) error + NetworkLock() error + NetworkUnlock() error + SetupNamespaces(pid int32) error + PostSetupNamespaces() error + PostResume() error +} + +// NoNotify struct +type NoNotify struct{} + +// PreDump NoNotify +func (c NoNotify) PreDump() error { + return nil +} + +// PostDump NoNotify +func (c NoNotify) PostDump() error { + return nil +} + +// PreRestore NoNotify +func (c NoNotify) PreRestore() error { + return nil +} + +// PostRestore NoNotify +func (c NoNotify) PostRestore(pid int32) error { + return nil +} + +// NetworkLock NoNotify +func (c NoNotify) NetworkLock() error { + return nil +} + +// NetworkUnlock NoNotify +func (c NoNotify) NetworkUnlock() error { + return nil +} + +// SetupNamespaces NoNotify +func (c NoNotify) SetupNamespaces(pid int32) error { + return nil +} + +// PostSetupNamespaces NoNotify +func (c NoNotify) PostSetupNamespaces() error { + return nil +} + +// PostResume NoNotify +func (c NoNotify) PostResume() error { + return nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.pb.go b/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.pb.go new file mode 100644 index 0000000000..67bd8593e8 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.pb.go @@ -0,0 +1,2327 @@ +// SPDX-License-Identifier: MIT + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc v3.19.4 +// source: rpc/rpc.proto + +package rpc + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type CriuCgMode int32 + +const ( + CriuCgMode_IGNORE CriuCgMode = 0 + CriuCgMode_CG_NONE CriuCgMode = 1 + CriuCgMode_PROPS CriuCgMode = 2 + CriuCgMode_SOFT CriuCgMode = 3 + CriuCgMode_FULL CriuCgMode = 4 + CriuCgMode_STRICT CriuCgMode = 5 + CriuCgMode_DEFAULT CriuCgMode = 6 +) + +// Enum value maps for CriuCgMode. +var ( + CriuCgMode_name = map[int32]string{ + 0: "IGNORE", + 1: "CG_NONE", + 2: "PROPS", + 3: "SOFT", + 4: "FULL", + 5: "STRICT", + 6: "DEFAULT", + } + CriuCgMode_value = map[string]int32{ + "IGNORE": 0, + "CG_NONE": 1, + "PROPS": 2, + "SOFT": 3, + "FULL": 4, + "STRICT": 5, + "DEFAULT": 6, + } +) + +func (x CriuCgMode) Enum() *CriuCgMode { + p := new(CriuCgMode) + *p = x + return p +} + +func (x CriuCgMode) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CriuCgMode) Descriptor() protoreflect.EnumDescriptor { + return file_rpc_rpc_proto_enumTypes[0].Descriptor() +} + +func (CriuCgMode) Type() protoreflect.EnumType { + return &file_rpc_rpc_proto_enumTypes[0] +} + +func (x CriuCgMode) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *CriuCgMode) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = CriuCgMode(num) + return nil +} + +// Deprecated: Use CriuCgMode.Descriptor instead. +func (CriuCgMode) EnumDescriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{0} +} + +type CriuNetworkLockMethod int32 + +const ( + CriuNetworkLockMethod_IPTABLES CriuNetworkLockMethod = 1 + CriuNetworkLockMethod_NFTABLES CriuNetworkLockMethod = 2 +) + +// Enum value maps for CriuNetworkLockMethod. +var ( + CriuNetworkLockMethod_name = map[int32]string{ + 1: "IPTABLES", + 2: "NFTABLES", + } + CriuNetworkLockMethod_value = map[string]int32{ + "IPTABLES": 1, + "NFTABLES": 2, + } +) + +func (x CriuNetworkLockMethod) Enum() *CriuNetworkLockMethod { + p := new(CriuNetworkLockMethod) + *p = x + return p +} + +func (x CriuNetworkLockMethod) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CriuNetworkLockMethod) Descriptor() protoreflect.EnumDescriptor { + return file_rpc_rpc_proto_enumTypes[1].Descriptor() +} + +func (CriuNetworkLockMethod) Type() protoreflect.EnumType { + return &file_rpc_rpc_proto_enumTypes[1] +} + +func (x CriuNetworkLockMethod) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *CriuNetworkLockMethod) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = CriuNetworkLockMethod(num) + return nil +} + +// Deprecated: Use CriuNetworkLockMethod.Descriptor instead. +func (CriuNetworkLockMethod) EnumDescriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{1} +} + +type CriuPreDumpMode int32 + +const ( + CriuPreDumpMode_SPLICE CriuPreDumpMode = 1 + CriuPreDumpMode_VM_READ CriuPreDumpMode = 2 +) + +// Enum value maps for CriuPreDumpMode. +var ( + CriuPreDumpMode_name = map[int32]string{ + 1: "SPLICE", + 2: "VM_READ", + } + CriuPreDumpMode_value = map[string]int32{ + "SPLICE": 1, + "VM_READ": 2, + } +) + +func (x CriuPreDumpMode) Enum() *CriuPreDumpMode { + p := new(CriuPreDumpMode) + *p = x + return p +} + +func (x CriuPreDumpMode) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CriuPreDumpMode) Descriptor() protoreflect.EnumDescriptor { + return file_rpc_rpc_proto_enumTypes[2].Descriptor() +} + +func (CriuPreDumpMode) Type() protoreflect.EnumType { + return &file_rpc_rpc_proto_enumTypes[2] +} + +func (x CriuPreDumpMode) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *CriuPreDumpMode) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = CriuPreDumpMode(num) + return nil +} + +// Deprecated: Use CriuPreDumpMode.Descriptor instead. +func (CriuPreDumpMode) EnumDescriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{2} +} + +type CriuReqType int32 + +const ( + CriuReqType_EMPTY CriuReqType = 0 + CriuReqType_DUMP CriuReqType = 1 + CriuReqType_RESTORE CriuReqType = 2 + CriuReqType_CHECK CriuReqType = 3 + CriuReqType_PRE_DUMP CriuReqType = 4 + CriuReqType_PAGE_SERVER CriuReqType = 5 + CriuReqType_NOTIFY CriuReqType = 6 + CriuReqType_CPUINFO_DUMP CriuReqType = 7 + CriuReqType_CPUINFO_CHECK CriuReqType = 8 + CriuReqType_FEATURE_CHECK CriuReqType = 9 + CriuReqType_VERSION CriuReqType = 10 + CriuReqType_WAIT_PID CriuReqType = 11 + CriuReqType_PAGE_SERVER_CHLD CriuReqType = 12 + CriuReqType_SINGLE_PRE_DUMP CriuReqType = 13 +) + +// Enum value maps for CriuReqType. +var ( + CriuReqType_name = map[int32]string{ + 0: "EMPTY", + 1: "DUMP", + 2: "RESTORE", + 3: "CHECK", + 4: "PRE_DUMP", + 5: "PAGE_SERVER", + 6: "NOTIFY", + 7: "CPUINFO_DUMP", + 8: "CPUINFO_CHECK", + 9: "FEATURE_CHECK", + 10: "VERSION", + 11: "WAIT_PID", + 12: "PAGE_SERVER_CHLD", + 13: "SINGLE_PRE_DUMP", + } + CriuReqType_value = map[string]int32{ + "EMPTY": 0, + "DUMP": 1, + "RESTORE": 2, + "CHECK": 3, + "PRE_DUMP": 4, + "PAGE_SERVER": 5, + "NOTIFY": 6, + "CPUINFO_DUMP": 7, + "CPUINFO_CHECK": 8, + "FEATURE_CHECK": 9, + "VERSION": 10, + "WAIT_PID": 11, + "PAGE_SERVER_CHLD": 12, + "SINGLE_PRE_DUMP": 13, + } +) + +func (x CriuReqType) Enum() *CriuReqType { + p := new(CriuReqType) + *p = x + return p +} + +func (x CriuReqType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (CriuReqType) Descriptor() protoreflect.EnumDescriptor { + return file_rpc_rpc_proto_enumTypes[3].Descriptor() +} + +func (CriuReqType) Type() protoreflect.EnumType { + return &file_rpc_rpc_proto_enumTypes[3] +} + +func (x CriuReqType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *CriuReqType) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = CriuReqType(num) + return nil +} + +// Deprecated: Use CriuReqType.Descriptor instead. +func (CriuReqType) EnumDescriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{3} +} + +type CriuPageServerInfo struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"` + Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` + Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"` + Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"` +} + +func (x *CriuPageServerInfo) Reset() { + *x = CriuPageServerInfo{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuPageServerInfo) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuPageServerInfo) ProtoMessage() {} + +func (x *CriuPageServerInfo) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuPageServerInfo.ProtoReflect.Descriptor instead. +func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{0} +} + +func (x *CriuPageServerInfo) GetAddress() string { + if x != nil && x.Address != nil { + return *x.Address + } + return "" +} + +func (x *CriuPageServerInfo) GetPort() int32 { + if x != nil && x.Port != nil { + return *x.Port + } + return 0 +} + +func (x *CriuPageServerInfo) GetPid() int32 { + if x != nil && x.Pid != nil { + return *x.Pid + } + return 0 +} + +func (x *CriuPageServerInfo) GetFd() int32 { + if x != nil && x.Fd != nil { + return *x.Fd + } + return 0 +} + +type CriuVethPair struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"` + IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"` +} + +func (x *CriuVethPair) Reset() { + *x = CriuVethPair{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuVethPair) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuVethPair) ProtoMessage() {} + +func (x *CriuVethPair) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuVethPair.ProtoReflect.Descriptor instead. +func (*CriuVethPair) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{1} +} + +func (x *CriuVethPair) GetIfIn() string { + if x != nil && x.IfIn != nil { + return *x.IfIn + } + return "" +} + +func (x *CriuVethPair) GetIfOut() string { + if x != nil && x.IfOut != nil { + return *x.IfOut + } + return "" +} + +type ExtMountMap struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"` +} + +func (x *ExtMountMap) Reset() { + *x = ExtMountMap{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ExtMountMap) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExtMountMap) ProtoMessage() {} + +func (x *ExtMountMap) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExtMountMap.ProtoReflect.Descriptor instead. +func (*ExtMountMap) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{2} +} + +func (x *ExtMountMap) GetKey() string { + if x != nil && x.Key != nil { + return *x.Key + } + return "" +} + +func (x *ExtMountMap) GetVal() string { + if x != nil && x.Val != nil { + return *x.Val + } + return "" +} + +type JoinNamespace struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"` + NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"` + ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"` +} + +func (x *JoinNamespace) Reset() { + *x = JoinNamespace{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *JoinNamespace) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*JoinNamespace) ProtoMessage() {} + +func (x *JoinNamespace) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use JoinNamespace.ProtoReflect.Descriptor instead. +func (*JoinNamespace) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{3} +} + +func (x *JoinNamespace) GetNs() string { + if x != nil && x.Ns != nil { + return *x.Ns + } + return "" +} + +func (x *JoinNamespace) GetNsFile() string { + if x != nil && x.NsFile != nil { + return *x.NsFile + } + return "" +} + +func (x *JoinNamespace) GetExtraOpt() string { + if x != nil && x.ExtraOpt != nil { + return *x.ExtraOpt + } + return "" +} + +type InheritFd struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` + Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"` +} + +func (x *InheritFd) Reset() { + *x = InheritFd{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *InheritFd) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*InheritFd) ProtoMessage() {} + +func (x *InheritFd) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use InheritFd.ProtoReflect.Descriptor instead. +func (*InheritFd) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{4} +} + +func (x *InheritFd) GetKey() string { + if x != nil && x.Key != nil { + return *x.Key + } + return "" +} + +func (x *InheritFd) GetFd() int32 { + if x != nil && x.Fd != nil { + return *x.Fd + } + return 0 +} + +type CgroupRoot struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"` + Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"` +} + +func (x *CgroupRoot) Reset() { + *x = CgroupRoot{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CgroupRoot) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CgroupRoot) ProtoMessage() {} + +func (x *CgroupRoot) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CgroupRoot.ProtoReflect.Descriptor instead. +func (*CgroupRoot) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{5} +} + +func (x *CgroupRoot) GetCtrl() string { + if x != nil && x.Ctrl != nil { + return *x.Ctrl + } + return "" +} + +func (x *CgroupRoot) GetPath() string { + if x != nil && x.Path != nil { + return *x.Path + } + return "" +} + +type UnixSk struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` +} + +func (x *UnixSk) Reset() { + *x = UnixSk{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *UnixSk) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UnixSk) ProtoMessage() {} + +func (x *UnixSk) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UnixSk.ProtoReflect.Descriptor instead. +func (*UnixSk) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{6} +} + +func (x *UnixSk) GetInode() uint32 { + if x != nil && x.Inode != nil { + return *x.Inode + } + return 0 +} + +type CriuOpts struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` // if not set on dump, will dump requesting process + LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"` + ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"` + TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"` + EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"` + ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"` + FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"` + LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"` + LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"` // No subdirs are allowed. Consider using work-dir + Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` + NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"` + Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` + ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"` + TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"` + AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"` + WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"` + LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"` + Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` // DEPRECATED, use external instead + CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"` + ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"` + ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"` + ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"` // DEPRECATED, use external instead + ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"` // backward compatibility + CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"` + RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"` // swrk only + InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"` // swrk only + AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"` + ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"` + ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"` + SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"` + EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"` + UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"` // DEPRECATED, use external instead + ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"` + GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"` + IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"` + External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"` + EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"` + JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"` + CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"` + CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"` + CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"` + FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"` + Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"` + TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"` + WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"` + LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"` + OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"` + ConfigFile *string `protobuf:"bytes,51,opt,name=config_file,json=configFile" json:"config_file,omitempty"` + TcpClose *bool `protobuf:"varint,52,opt,name=tcp_close,json=tcpClose" json:"tcp_close,omitempty"` + LsmProfile *string `protobuf:"bytes,53,opt,name=lsm_profile,json=lsmProfile" json:"lsm_profile,omitempty"` + TlsCacert *string `protobuf:"bytes,54,opt,name=tls_cacert,json=tlsCacert" json:"tls_cacert,omitempty"` + TlsCacrl *string `protobuf:"bytes,55,opt,name=tls_cacrl,json=tlsCacrl" json:"tls_cacrl,omitempty"` + TlsCert *string `protobuf:"bytes,56,opt,name=tls_cert,json=tlsCert" json:"tls_cert,omitempty"` + TlsKey *string `protobuf:"bytes,57,opt,name=tls_key,json=tlsKey" json:"tls_key,omitempty"` + Tls *bool `protobuf:"varint,58,opt,name=tls" json:"tls,omitempty"` + TlsNoCnVerify *bool `protobuf:"varint,59,opt,name=tls_no_cn_verify,json=tlsNoCnVerify" json:"tls_no_cn_verify,omitempty"` + CgroupYard *string `protobuf:"bytes,60,opt,name=cgroup_yard,json=cgroupYard" json:"cgroup_yard,omitempty"` + PreDumpMode *CriuPreDumpMode `protobuf:"varint,61,opt,name=pre_dump_mode,json=preDumpMode,enum=CriuPreDumpMode,def=1" json:"pre_dump_mode,omitempty"` + PidfdStoreSk *int32 `protobuf:"varint,62,opt,name=pidfd_store_sk,json=pidfdStoreSk" json:"pidfd_store_sk,omitempty"` + LsmMountContext *string `protobuf:"bytes,63,opt,name=lsm_mount_context,json=lsmMountContext" json:"lsm_mount_context,omitempty"` + NetworkLock *CriuNetworkLockMethod `protobuf:"varint,64,opt,name=network_lock,json=networkLock,enum=CriuNetworkLockMethod,def=1" json:"network_lock,omitempty"` + MntnsCompatMode *bool `protobuf:"varint,65,opt,name=mntns_compat_mode,json=mntnsCompatMode" json:"mntns_compat_mode,omitempty"` // optional bool check_mounts = 128; +} + +// Default values for CriuOpts fields. +const ( + Default_CriuOpts_LogLevel = int32(2) + Default_CriuOpts_CpuCap = uint32(4294967295) + Default_CriuOpts_GhostLimit = uint32(1048576) + Default_CriuOpts_PreDumpMode = CriuPreDumpMode_SPLICE + Default_CriuOpts_NetworkLock = CriuNetworkLockMethod_IPTABLES +) + +func (x *CriuOpts) Reset() { + *x = CriuOpts{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuOpts) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuOpts) ProtoMessage() {} + +func (x *CriuOpts) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuOpts.ProtoReflect.Descriptor instead. +func (*CriuOpts) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{7} +} + +func (x *CriuOpts) GetImagesDirFd() int32 { + if x != nil && x.ImagesDirFd != nil { + return *x.ImagesDirFd + } + return 0 +} + +func (x *CriuOpts) GetPid() int32 { + if x != nil && x.Pid != nil { + return *x.Pid + } + return 0 +} + +func (x *CriuOpts) GetLeaveRunning() bool { + if x != nil && x.LeaveRunning != nil { + return *x.LeaveRunning + } + return false +} + +func (x *CriuOpts) GetExtUnixSk() bool { + if x != nil && x.ExtUnixSk != nil { + return *x.ExtUnixSk + } + return false +} + +func (x *CriuOpts) GetTcpEstablished() bool { + if x != nil && x.TcpEstablished != nil { + return *x.TcpEstablished + } + return false +} + +func (x *CriuOpts) GetEvasiveDevices() bool { + if x != nil && x.EvasiveDevices != nil { + return *x.EvasiveDevices + } + return false +} + +func (x *CriuOpts) GetShellJob() bool { + if x != nil && x.ShellJob != nil { + return *x.ShellJob + } + return false +} + +func (x *CriuOpts) GetFileLocks() bool { + if x != nil && x.FileLocks != nil { + return *x.FileLocks + } + return false +} + +func (x *CriuOpts) GetLogLevel() int32 { + if x != nil && x.LogLevel != nil { + return *x.LogLevel + } + return Default_CriuOpts_LogLevel +} + +func (x *CriuOpts) GetLogFile() string { + if x != nil && x.LogFile != nil { + return *x.LogFile + } + return "" +} + +func (x *CriuOpts) GetPs() *CriuPageServerInfo { + if x != nil { + return x.Ps + } + return nil +} + +func (x *CriuOpts) GetNotifyScripts() bool { + if x != nil && x.NotifyScripts != nil { + return *x.NotifyScripts + } + return false +} + +func (x *CriuOpts) GetRoot() string { + if x != nil && x.Root != nil { + return *x.Root + } + return "" +} + +func (x *CriuOpts) GetParentImg() string { + if x != nil && x.ParentImg != nil { + return *x.ParentImg + } + return "" +} + +func (x *CriuOpts) GetTrackMem() bool { + if x != nil && x.TrackMem != nil { + return *x.TrackMem + } + return false +} + +func (x *CriuOpts) GetAutoDedup() bool { + if x != nil && x.AutoDedup != nil { + return *x.AutoDedup + } + return false +} + +func (x *CriuOpts) GetWorkDirFd() int32 { + if x != nil && x.WorkDirFd != nil { + return *x.WorkDirFd + } + return 0 +} + +func (x *CriuOpts) GetLinkRemap() bool { + if x != nil && x.LinkRemap != nil { + return *x.LinkRemap + } + return false +} + +func (x *CriuOpts) GetVeths() []*CriuVethPair { + if x != nil { + return x.Veths + } + return nil +} + +func (x *CriuOpts) GetCpuCap() uint32 { + if x != nil && x.CpuCap != nil { + return *x.CpuCap + } + return Default_CriuOpts_CpuCap +} + +func (x *CriuOpts) GetForceIrmap() bool { + if x != nil && x.ForceIrmap != nil { + return *x.ForceIrmap + } + return false +} + +func (x *CriuOpts) GetExecCmd() []string { + if x != nil { + return x.ExecCmd + } + return nil +} + +func (x *CriuOpts) GetExtMnt() []*ExtMountMap { + if x != nil { + return x.ExtMnt + } + return nil +} + +func (x *CriuOpts) GetManageCgroups() bool { + if x != nil && x.ManageCgroups != nil { + return *x.ManageCgroups + } + return false +} + +func (x *CriuOpts) GetCgRoot() []*CgroupRoot { + if x != nil { + return x.CgRoot + } + return nil +} + +func (x *CriuOpts) GetRstSibling() bool { + if x != nil && x.RstSibling != nil { + return *x.RstSibling + } + return false +} + +func (x *CriuOpts) GetInheritFd() []*InheritFd { + if x != nil { + return x.InheritFd + } + return nil +} + +func (x *CriuOpts) GetAutoExtMnt() bool { + if x != nil && x.AutoExtMnt != nil { + return *x.AutoExtMnt + } + return false +} + +func (x *CriuOpts) GetExtSharing() bool { + if x != nil && x.ExtSharing != nil { + return *x.ExtSharing + } + return false +} + +func (x *CriuOpts) GetExtMasters() bool { + if x != nil && x.ExtMasters != nil { + return *x.ExtMasters + } + return false +} + +func (x *CriuOpts) GetSkipMnt() []string { + if x != nil { + return x.SkipMnt + } + return nil +} + +func (x *CriuOpts) GetEnableFs() []string { + if x != nil { + return x.EnableFs + } + return nil +} + +func (x *CriuOpts) GetUnixSkIno() []*UnixSk { + if x != nil { + return x.UnixSkIno + } + return nil +} + +func (x *CriuOpts) GetManageCgroupsMode() CriuCgMode { + if x != nil && x.ManageCgroupsMode != nil { + return *x.ManageCgroupsMode + } + return CriuCgMode_IGNORE +} + +func (x *CriuOpts) GetGhostLimit() uint32 { + if x != nil && x.GhostLimit != nil { + return *x.GhostLimit + } + return Default_CriuOpts_GhostLimit +} + +func (x *CriuOpts) GetIrmapScanPaths() []string { + if x != nil { + return x.IrmapScanPaths + } + return nil +} + +func (x *CriuOpts) GetExternal() []string { + if x != nil { + return x.External + } + return nil +} + +func (x *CriuOpts) GetEmptyNs() uint32 { + if x != nil && x.EmptyNs != nil { + return *x.EmptyNs + } + return 0 +} + +func (x *CriuOpts) GetJoinNs() []*JoinNamespace { + if x != nil { + return x.JoinNs + } + return nil +} + +func (x *CriuOpts) GetCgroupProps() string { + if x != nil && x.CgroupProps != nil { + return *x.CgroupProps + } + return "" +} + +func (x *CriuOpts) GetCgroupPropsFile() string { + if x != nil && x.CgroupPropsFile != nil { + return *x.CgroupPropsFile + } + return "" +} + +func (x *CriuOpts) GetCgroupDumpController() []string { + if x != nil { + return x.CgroupDumpController + } + return nil +} + +func (x *CriuOpts) GetFreezeCgroup() string { + if x != nil && x.FreezeCgroup != nil { + return *x.FreezeCgroup + } + return "" +} + +func (x *CriuOpts) GetTimeout() uint32 { + if x != nil && x.Timeout != nil { + return *x.Timeout + } + return 0 +} + +func (x *CriuOpts) GetTcpSkipInFlight() bool { + if x != nil && x.TcpSkipInFlight != nil { + return *x.TcpSkipInFlight + } + return false +} + +func (x *CriuOpts) GetWeakSysctls() bool { + if x != nil && x.WeakSysctls != nil { + return *x.WeakSysctls + } + return false +} + +func (x *CriuOpts) GetLazyPages() bool { + if x != nil && x.LazyPages != nil { + return *x.LazyPages + } + return false +} + +func (x *CriuOpts) GetStatusFd() int32 { + if x != nil && x.StatusFd != nil { + return *x.StatusFd + } + return 0 +} + +func (x *CriuOpts) GetOrphanPtsMaster() bool { + if x != nil && x.OrphanPtsMaster != nil { + return *x.OrphanPtsMaster + } + return false +} + +func (x *CriuOpts) GetConfigFile() string { + if x != nil && x.ConfigFile != nil { + return *x.ConfigFile + } + return "" +} + +func (x *CriuOpts) GetTcpClose() bool { + if x != nil && x.TcpClose != nil { + return *x.TcpClose + } + return false +} + +func (x *CriuOpts) GetLsmProfile() string { + if x != nil && x.LsmProfile != nil { + return *x.LsmProfile + } + return "" +} + +func (x *CriuOpts) GetTlsCacert() string { + if x != nil && x.TlsCacert != nil { + return *x.TlsCacert + } + return "" +} + +func (x *CriuOpts) GetTlsCacrl() string { + if x != nil && x.TlsCacrl != nil { + return *x.TlsCacrl + } + return "" +} + +func (x *CriuOpts) GetTlsCert() string { + if x != nil && x.TlsCert != nil { + return *x.TlsCert + } + return "" +} + +func (x *CriuOpts) GetTlsKey() string { + if x != nil && x.TlsKey != nil { + return *x.TlsKey + } + return "" +} + +func (x *CriuOpts) GetTls() bool { + if x != nil && x.Tls != nil { + return *x.Tls + } + return false +} + +func (x *CriuOpts) GetTlsNoCnVerify() bool { + if x != nil && x.TlsNoCnVerify != nil { + return *x.TlsNoCnVerify + } + return false +} + +func (x *CriuOpts) GetCgroupYard() string { + if x != nil && x.CgroupYard != nil { + return *x.CgroupYard + } + return "" +} + +func (x *CriuOpts) GetPreDumpMode() CriuPreDumpMode { + if x != nil && x.PreDumpMode != nil { + return *x.PreDumpMode + } + return Default_CriuOpts_PreDumpMode +} + +func (x *CriuOpts) GetPidfdStoreSk() int32 { + if x != nil && x.PidfdStoreSk != nil { + return *x.PidfdStoreSk + } + return 0 +} + +func (x *CriuOpts) GetLsmMountContext() string { + if x != nil && x.LsmMountContext != nil { + return *x.LsmMountContext + } + return "" +} + +func (x *CriuOpts) GetNetworkLock() CriuNetworkLockMethod { + if x != nil && x.NetworkLock != nil { + return *x.NetworkLock + } + return Default_CriuOpts_NetworkLock +} + +func (x *CriuOpts) GetMntnsCompatMode() bool { + if x != nil && x.MntnsCompatMode != nil { + return *x.MntnsCompatMode + } + return false +} + +type CriuDumpResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` +} + +func (x *CriuDumpResp) Reset() { + *x = CriuDumpResp{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuDumpResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuDumpResp) ProtoMessage() {} + +func (x *CriuDumpResp) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuDumpResp.ProtoReflect.Descriptor instead. +func (*CriuDumpResp) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{8} +} + +func (x *CriuDumpResp) GetRestored() bool { + if x != nil && x.Restored != nil { + return *x.Restored + } + return false +} + +type CriuRestoreResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"` +} + +func (x *CriuRestoreResp) Reset() { + *x = CriuRestoreResp{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuRestoreResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuRestoreResp) ProtoMessage() {} + +func (x *CriuRestoreResp) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuRestoreResp.ProtoReflect.Descriptor instead. +func (*CriuRestoreResp) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{9} +} + +func (x *CriuRestoreResp) GetPid() int32 { + if x != nil && x.Pid != nil { + return *x.Pid + } + return 0 +} + +type CriuNotify struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"` + Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` +} + +func (x *CriuNotify) Reset() { + *x = CriuNotify{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuNotify) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuNotify) ProtoMessage() {} + +func (x *CriuNotify) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuNotify.ProtoReflect.Descriptor instead. +func (*CriuNotify) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{10} +} + +func (x *CriuNotify) GetScript() string { + if x != nil && x.Script != nil { + return *x.Script + } + return "" +} + +func (x *CriuNotify) GetPid() int32 { + if x != nil && x.Pid != nil { + return *x.Pid + } + return 0 +} + +// +// List of features which can queried via +// CRIU_REQ_TYPE__FEATURE_CHECK +type CriuFeatures struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"` + LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` + PidfdStore *bool `protobuf:"varint,3,opt,name=pidfd_store,json=pidfdStore" json:"pidfd_store,omitempty"` +} + +func (x *CriuFeatures) Reset() { + *x = CriuFeatures{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuFeatures) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuFeatures) ProtoMessage() {} + +func (x *CriuFeatures) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuFeatures.ProtoReflect.Descriptor instead. +func (*CriuFeatures) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{11} +} + +func (x *CriuFeatures) GetMemTrack() bool { + if x != nil && x.MemTrack != nil { + return *x.MemTrack + } + return false +} + +func (x *CriuFeatures) GetLazyPages() bool { + if x != nil && x.LazyPages != nil { + return *x.LazyPages + } + return false +} + +func (x *CriuFeatures) GetPidfdStore() bool { + if x != nil && x.PidfdStore != nil { + return *x.PidfdStore + } + return false +} + +type CriuReq struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"` + NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"` + // + // When set service won't close the connection but + // will wait for more req-s to appear. Works not + // for all request types. + KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"` + // + // 'features' can be used to query which features + // are supported by the installed criu/kernel + // via RPC. + Features *CriuFeatures `protobuf:"bytes,5,opt,name=features" json:"features,omitempty"` + // 'pid' is used for WAIT_PID + Pid *uint32 `protobuf:"varint,6,opt,name=pid" json:"pid,omitempty"` +} + +func (x *CriuReq) Reset() { + *x = CriuReq{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuReq) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuReq) ProtoMessage() {} + +func (x *CriuReq) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuReq.ProtoReflect.Descriptor instead. +func (*CriuReq) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{12} +} + +func (x *CriuReq) GetType() CriuReqType { + if x != nil && x.Type != nil { + return *x.Type + } + return CriuReqType_EMPTY +} + +func (x *CriuReq) GetOpts() *CriuOpts { + if x != nil { + return x.Opts + } + return nil +} + +func (x *CriuReq) GetNotifySuccess() bool { + if x != nil && x.NotifySuccess != nil { + return *x.NotifySuccess + } + return false +} + +func (x *CriuReq) GetKeepOpen() bool { + if x != nil && x.KeepOpen != nil { + return *x.KeepOpen + } + return false +} + +func (x *CriuReq) GetFeatures() *CriuFeatures { + if x != nil { + return x.Features + } + return nil +} + +func (x *CriuReq) GetPid() uint32 { + if x != nil && x.Pid != nil { + return *x.Pid + } + return 0 +} + +type CriuResp struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` + Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"` + Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"` + Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"` + Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"` + Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"` + CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"` + Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"` + CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"` + Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"` + Status *int32 `protobuf:"varint,11,opt,name=status" json:"status,omitempty"` +} + +func (x *CriuResp) Reset() { + *x = CriuResp{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuResp) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuResp) ProtoMessage() {} + +func (x *CriuResp) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuResp.ProtoReflect.Descriptor instead. +func (*CriuResp) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{13} +} + +func (x *CriuResp) GetType() CriuReqType { + if x != nil && x.Type != nil { + return *x.Type + } + return CriuReqType_EMPTY +} + +func (x *CriuResp) GetSuccess() bool { + if x != nil && x.Success != nil { + return *x.Success + } + return false +} + +func (x *CriuResp) GetDump() *CriuDumpResp { + if x != nil { + return x.Dump + } + return nil +} + +func (x *CriuResp) GetRestore() *CriuRestoreResp { + if x != nil { + return x.Restore + } + return nil +} + +func (x *CriuResp) GetNotify() *CriuNotify { + if x != nil { + return x.Notify + } + return nil +} + +func (x *CriuResp) GetPs() *CriuPageServerInfo { + if x != nil { + return x.Ps + } + return nil +} + +func (x *CriuResp) GetCrErrno() int32 { + if x != nil && x.CrErrno != nil { + return *x.CrErrno + } + return 0 +} + +func (x *CriuResp) GetFeatures() *CriuFeatures { + if x != nil { + return x.Features + } + return nil +} + +func (x *CriuResp) GetCrErrmsg() string { + if x != nil && x.CrErrmsg != nil { + return *x.CrErrmsg + } + return "" +} + +func (x *CriuResp) GetVersion() *CriuVersion { + if x != nil { + return x.Version + } + return nil +} + +func (x *CriuResp) GetStatus() int32 { + if x != nil && x.Status != nil { + return *x.Status + } + return 0 +} + +// Answer for criu_req_type.VERSION requests +type CriuVersion struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + MajorNumber *int32 `protobuf:"varint,1,req,name=major_number,json=majorNumber" json:"major_number,omitempty"` + MinorNumber *int32 `protobuf:"varint,2,req,name=minor_number,json=minorNumber" json:"minor_number,omitempty"` + Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"` + Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"` + Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"` + Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"` +} + +func (x *CriuVersion) Reset() { + *x = CriuVersion{} + if protoimpl.UnsafeEnabled { + mi := &file_rpc_rpc_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CriuVersion) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CriuVersion) ProtoMessage() {} + +func (x *CriuVersion) ProtoReflect() protoreflect.Message { + mi := &file_rpc_rpc_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CriuVersion.ProtoReflect.Descriptor instead. +func (*CriuVersion) Descriptor() ([]byte, []int) { + return file_rpc_rpc_proto_rawDescGZIP(), []int{14} +} + +func (x *CriuVersion) GetMajorNumber() int32 { + if x != nil && x.MajorNumber != nil { + return *x.MajorNumber + } + return 0 +} + +func (x *CriuVersion) GetMinorNumber() int32 { + if x != nil && x.MinorNumber != nil { + return *x.MinorNumber + } + return 0 +} + +func (x *CriuVersion) GetGitid() string { + if x != nil && x.Gitid != nil { + return *x.Gitid + } + return "" +} + +func (x *CriuVersion) GetSublevel() int32 { + if x != nil && x.Sublevel != nil { + return *x.Sublevel + } + return 0 +} + +func (x *CriuVersion) GetExtra() int32 { + if x != nil && x.Extra != nil { + return *x.Extra + } + return 0 +} + +func (x *CriuVersion) GetName() string { + if x != nil && x.Name != nil { + return *x.Name + } + return "" +} + +var File_rpc_rpc_proto protoreflect.FileDescriptor + +var file_rpc_rpc_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x72, 0x70, 0x63, 0x2f, 0x72, 0x70, 0x63, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, + 0x67, 0x0a, 0x15, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, + 0x76, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x18, 0x0a, 0x07, 0x61, 0x64, 0x64, 0x72, + 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x61, 0x64, 0x64, 0x72, 0x65, + 0x73, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, 0x0e, 0x0a, 0x02, 0x66, 0x64, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x66, 0x64, 0x22, 0x3c, 0x0a, 0x0e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x76, 0x65, 0x74, 0x68, 0x5f, 0x70, 0x61, 0x69, 0x72, 0x12, 0x13, 0x0a, 0x05, 0x69, 0x66, + 0x5f, 0x69, 0x6e, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x04, 0x69, 0x66, 0x49, 0x6e, 0x12, + 0x15, 0x0a, 0x06, 0x69, 0x66, 0x5f, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, + 0x05, 0x69, 0x66, 0x4f, 0x75, 0x74, 0x22, 0x33, 0x0a, 0x0d, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6f, + 0x75, 0x6e, 0x74, 0x5f, 0x6d, 0x61, 0x70, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x76, 0x61, 0x6c, + 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, 0x76, 0x61, 0x6c, 0x22, 0x56, 0x0a, 0x0e, 0x6a, + 0x6f, 0x69, 0x6e, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x0e, 0x0a, + 0x02, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x02, 0x6e, 0x73, 0x12, 0x17, 0x0a, + 0x07, 0x6e, 0x73, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, 0x06, + 0x6e, 0x73, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, + 0x6f, 0x70, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x78, 0x74, 0x72, 0x61, + 0x4f, 0x70, 0x74, 0x22, 0x2e, 0x0a, 0x0a, 0x69, 0x6e, 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, + 0x64, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, + 0x6b, 0x65, 0x79, 0x12, 0x0e, 0x0a, 0x02, 0x66, 0x64, 0x18, 0x02, 0x20, 0x02, 0x28, 0x05, 0x52, + 0x02, 0x66, 0x64, 0x22, 0x35, 0x0a, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x72, 0x6f, + 0x6f, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x74, 0x72, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x63, 0x74, 0x72, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x02, + 0x20, 0x02, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x22, 0x1f, 0x0a, 0x07, 0x75, 0x6e, + 0x69, 0x78, 0x5f, 0x73, 0x6b, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, + 0x20, 0x02, 0x28, 0x0d, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x22, 0x80, 0x12, 0x0a, 0x09, + 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6f, 0x70, 0x74, 0x73, 0x12, 0x22, 0x0a, 0x0d, 0x69, 0x6d, 0x61, + 0x67, 0x65, 0x73, 0x5f, 0x64, 0x69, 0x72, 0x5f, 0x66, 0x64, 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, + 0x52, 0x0b, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x44, 0x69, 0x72, 0x46, 0x64, 0x12, 0x10, 0x0a, + 0x03, 0x70, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, + 0x23, 0x0a, 0x0d, 0x6c, 0x65, 0x61, 0x76, 0x65, 0x5f, 0x72, 0x75, 0x6e, 0x6e, 0x69, 0x6e, 0x67, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x6c, 0x65, 0x61, 0x76, 0x65, 0x52, 0x75, 0x6e, + 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x5f, 0x75, 0x6e, 0x69, 0x78, + 0x5f, 0x73, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x65, 0x78, 0x74, 0x55, 0x6e, + 0x69, 0x78, 0x53, 0x6b, 0x12, 0x27, 0x0a, 0x0f, 0x74, 0x63, 0x70, 0x5f, 0x65, 0x73, 0x74, 0x61, + 0x62, 0x6c, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x74, + 0x63, 0x70, 0x45, 0x73, 0x74, 0x61, 0x62, 0x6c, 0x69, 0x73, 0x68, 0x65, 0x64, 0x12, 0x27, 0x0a, + 0x0f, 0x65, 0x76, 0x61, 0x73, 0x69, 0x76, 0x65, 0x5f, 0x64, 0x65, 0x76, 0x69, 0x63, 0x65, 0x73, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x65, 0x76, 0x61, 0x73, 0x69, 0x76, 0x65, 0x44, + 0x65, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x68, 0x65, 0x6c, 0x6c, 0x5f, + 0x6a, 0x6f, 0x62, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x73, 0x68, 0x65, 0x6c, 0x6c, + 0x4a, 0x6f, 0x62, 0x12, 0x1d, 0x0a, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6c, 0x6f, 0x63, 0x6b, + 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x4c, 0x6f, 0x63, + 0x6b, 0x73, 0x12, 0x1e, 0x0a, 0x09, 0x6c, 0x6f, 0x67, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, + 0x09, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x08, 0x6c, 0x6f, 0x67, 0x4c, 0x65, 0x76, + 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x6f, 0x67, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x0a, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6c, 0x6f, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x26, 0x0a, + 0x02, 0x70, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x66, + 0x6f, 0x52, 0x02, 0x70, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x5f, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6e, + 0x6f, 0x74, 0x69, 0x66, 0x79, 0x53, 0x63, 0x72, 0x69, 0x70, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, + 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6f, 0x74, + 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x6d, 0x67, 0x18, 0x0e, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x49, 0x6d, 0x67, 0x12, + 0x1b, 0x0a, 0x09, 0x74, 0x72, 0x61, 0x63, 0x6b, 0x5f, 0x6d, 0x65, 0x6d, 0x18, 0x0f, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x08, 0x74, 0x72, 0x61, 0x63, 0x6b, 0x4d, 0x65, 0x6d, 0x12, 0x1d, 0x0a, 0x0a, + 0x61, 0x75, 0x74, 0x6f, 0x5f, 0x64, 0x65, 0x64, 0x75, 0x70, 0x18, 0x10, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x09, 0x61, 0x75, 0x74, 0x6f, 0x44, 0x65, 0x64, 0x75, 0x70, 0x12, 0x1e, 0x0a, 0x0b, 0x77, + 0x6f, 0x72, 0x6b, 0x5f, 0x64, 0x69, 0x72, 0x5f, 0x66, 0x64, 0x18, 0x11, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x09, 0x77, 0x6f, 0x72, 0x6b, 0x44, 0x69, 0x72, 0x46, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, + 0x69, 0x6e, 0x6b, 0x5f, 0x72, 0x65, 0x6d, 0x61, 0x70, 0x18, 0x12, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x09, 0x6c, 0x69, 0x6e, 0x6b, 0x52, 0x65, 0x6d, 0x61, 0x70, 0x12, 0x25, 0x0a, 0x05, 0x76, 0x65, + 0x74, 0x68, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x76, 0x65, 0x74, 0x68, 0x5f, 0x70, 0x61, 0x69, 0x72, 0x52, 0x05, 0x76, 0x65, 0x74, 0x68, + 0x73, 0x12, 0x23, 0x0a, 0x07, 0x63, 0x70, 0x75, 0x5f, 0x63, 0x61, 0x70, 0x18, 0x14, 0x20, 0x01, + 0x28, 0x0d, 0x3a, 0x0a, 0x34, 0x32, 0x39, 0x34, 0x39, 0x36, 0x37, 0x32, 0x39, 0x35, 0x52, 0x06, + 0x63, 0x70, 0x75, 0x43, 0x61, 0x70, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x5f, + 0x69, 0x72, 0x6d, 0x61, 0x70, 0x18, 0x15, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x66, 0x6f, 0x72, + 0x63, 0x65, 0x49, 0x72, 0x6d, 0x61, 0x70, 0x12, 0x19, 0x0a, 0x08, 0x65, 0x78, 0x65, 0x63, 0x5f, + 0x63, 0x6d, 0x64, 0x18, 0x16, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x65, 0x78, 0x65, 0x63, 0x43, + 0x6d, 0x64, 0x12, 0x27, 0x0a, 0x07, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x17, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x5f, + 0x6d, 0x61, 0x70, 0x52, 0x06, 0x65, 0x78, 0x74, 0x4d, 0x6e, 0x74, 0x12, 0x25, 0x0a, 0x0e, 0x6d, + 0x61, 0x6e, 0x61, 0x67, 0x65, 0x5f, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x18, 0x18, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x43, 0x67, 0x72, 0x6f, 0x75, + 0x70, 0x73, 0x12, 0x25, 0x0a, 0x07, 0x63, 0x67, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x19, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x72, 0x6f, 0x6f, + 0x74, 0x52, 0x06, 0x63, 0x67, 0x52, 0x6f, 0x6f, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x72, 0x73, 0x74, + 0x5f, 0x73, 0x69, 0x62, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, + 0x72, 0x73, 0x74, 0x53, 0x69, 0x62, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x0a, 0x69, 0x6e, + 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, 0x64, 0x18, 0x1b, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0b, + 0x2e, 0x69, 0x6e, 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, 0x64, 0x52, 0x09, 0x69, 0x6e, 0x68, + 0x65, 0x72, 0x69, 0x74, 0x46, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x61, 0x75, 0x74, 0x6f, 0x5f, 0x65, + 0x78, 0x74, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x61, 0x75, + 0x74, 0x6f, 0x45, 0x78, 0x74, 0x4d, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x5f, + 0x73, 0x68, 0x61, 0x72, 0x69, 0x6e, 0x67, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x65, + 0x78, 0x74, 0x53, 0x68, 0x61, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, + 0x5f, 0x6d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, + 0x65, 0x78, 0x74, 0x4d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x6b, + 0x69, 0x70, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x1f, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x73, 0x6b, + 0x69, 0x70, 0x4d, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x5f, + 0x66, 0x73, 0x18, 0x20, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, + 0x46, 0x73, 0x12, 0x28, 0x0a, 0x0b, 0x75, 0x6e, 0x69, 0x78, 0x5f, 0x73, 0x6b, 0x5f, 0x69, 0x6e, + 0x6f, 0x18, 0x21, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x08, 0x2e, 0x75, 0x6e, 0x69, 0x78, 0x5f, 0x73, + 0x6b, 0x52, 0x09, 0x75, 0x6e, 0x69, 0x78, 0x53, 0x6b, 0x49, 0x6e, 0x6f, 0x12, 0x3d, 0x0a, 0x13, + 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x5f, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x5f, 0x6d, + 0x6f, 0x64, 0x65, 0x18, 0x22, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x63, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x52, 0x11, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, + 0x43, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x28, 0x0a, 0x0b, 0x67, + 0x68, 0x6f, 0x73, 0x74, 0x5f, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x23, 0x20, 0x01, 0x28, 0x0d, + 0x3a, 0x07, 0x31, 0x30, 0x34, 0x38, 0x35, 0x37, 0x36, 0x52, 0x0a, 0x67, 0x68, 0x6f, 0x73, 0x74, + 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x69, 0x72, 0x6d, 0x61, 0x70, 0x5f, 0x73, + 0x63, 0x61, 0x6e, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, 0x18, 0x24, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x0e, 0x69, 0x72, 0x6d, 0x61, 0x70, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x61, 0x74, 0x68, 0x73, 0x12, + 0x1a, 0x0a, 0x08, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x18, 0x25, 0x20, 0x03, 0x28, + 0x09, 0x52, 0x08, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x65, + 0x6d, 0x70, 0x74, 0x79, 0x5f, 0x6e, 0x73, 0x18, 0x26, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x65, + 0x6d, 0x70, 0x74, 0x79, 0x4e, 0x73, 0x12, 0x28, 0x0a, 0x07, 0x6a, 0x6f, 0x69, 0x6e, 0x5f, 0x6e, + 0x73, 0x18, 0x27, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x5f, 0x6e, + 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x52, 0x06, 0x6a, 0x6f, 0x69, 0x6e, 0x4e, 0x73, + 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x73, + 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x50, 0x72, + 0x6f, 0x70, 0x73, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x70, 0x72, + 0x6f, 0x70, 0x73, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, + 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x50, 0x72, 0x6f, 0x70, 0x73, 0x46, 0x69, 0x6c, 0x65, 0x12, + 0x34, 0x0a, 0x16, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x63, + 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x18, 0x2b, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x14, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x44, 0x75, 0x6d, 0x70, 0x43, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x72, 0x65, 0x65, 0x7a, 0x65, 0x5f, + 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x66, 0x72, + 0x65, 0x65, 0x7a, 0x65, 0x43, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x69, + 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x69, 0x6d, + 0x65, 0x6f, 0x75, 0x74, 0x12, 0x2b, 0x0a, 0x12, 0x74, 0x63, 0x70, 0x5f, 0x73, 0x6b, 0x69, 0x70, + 0x5f, 0x69, 0x6e, 0x5f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0f, 0x74, 0x63, 0x70, 0x53, 0x6b, 0x69, 0x70, 0x49, 0x6e, 0x46, 0x6c, 0x69, 0x67, 0x68, + 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x77, 0x65, 0x61, 0x6b, 0x5f, 0x73, 0x79, 0x73, 0x63, 0x74, 0x6c, + 0x73, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0b, 0x77, 0x65, 0x61, 0x6b, 0x53, 0x79, 0x73, + 0x63, 0x74, 0x6c, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x61, 0x7a, 0x79, 0x5f, 0x70, 0x61, 0x67, + 0x65, 0x73, 0x18, 0x30, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x61, 0x7a, 0x79, 0x50, 0x61, + 0x67, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x5f, 0x66, 0x64, + 0x18, 0x31, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x46, 0x64, + 0x12, 0x2a, 0x0a, 0x11, 0x6f, 0x72, 0x70, 0x68, 0x61, 0x6e, 0x5f, 0x70, 0x74, 0x73, 0x5f, 0x6d, + 0x61, 0x73, 0x74, 0x65, 0x72, 0x18, 0x32, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x6f, 0x72, 0x70, + 0x68, 0x61, 0x6e, 0x50, 0x74, 0x73, 0x4d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x12, 0x1f, 0x0a, 0x0b, + 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x1b, 0x0a, + 0x09, 0x74, 0x63, 0x70, 0x5f, 0x63, 0x6c, 0x6f, 0x73, 0x65, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x74, 0x63, 0x70, 0x43, 0x6c, 0x6f, 0x73, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x73, + 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0a, 0x6c, 0x73, 0x6d, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x74, + 0x6c, 0x73, 0x5f, 0x63, 0x61, 0x63, 0x65, 0x72, 0x74, 0x18, 0x36, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x09, 0x74, 0x6c, 0x73, 0x43, 0x61, 0x63, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x6c, + 0x73, 0x5f, 0x63, 0x61, 0x63, 0x72, 0x6c, 0x18, 0x37, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, + 0x6c, 0x73, 0x43, 0x61, 0x63, 0x72, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x6c, 0x73, 0x5f, 0x63, + 0x65, 0x72, 0x74, 0x18, 0x38, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x74, 0x6c, 0x73, 0x43, 0x65, + 0x72, 0x74, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x6c, 0x73, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x39, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x6c, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x74, + 0x6c, 0x73, 0x18, 0x3a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x03, 0x74, 0x6c, 0x73, 0x12, 0x27, 0x0a, + 0x10, 0x74, 0x6c, 0x73, 0x5f, 0x6e, 0x6f, 0x5f, 0x63, 0x6e, 0x5f, 0x76, 0x65, 0x72, 0x69, 0x66, + 0x79, 0x18, 0x3b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x74, 0x6c, 0x73, 0x4e, 0x6f, 0x43, 0x6e, + 0x56, 0x65, 0x72, 0x69, 0x66, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, + 0x5f, 0x79, 0x61, 0x72, 0x64, 0x18, 0x3c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x67, 0x72, + 0x6f, 0x75, 0x70, 0x59, 0x61, 0x72, 0x64, 0x12, 0x3f, 0x0a, 0x0d, 0x70, 0x72, 0x65, 0x5f, 0x64, + 0x75, 0x6d, 0x70, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x3d, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x13, + 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x72, 0x65, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x6d, + 0x6f, 0x64, 0x65, 0x3a, 0x06, 0x53, 0x50, 0x4c, 0x49, 0x43, 0x45, 0x52, 0x0b, 0x70, 0x72, 0x65, + 0x44, 0x75, 0x6d, 0x70, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x70, 0x69, 0x64, 0x66, + 0x64, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x73, 0x6b, 0x18, 0x3e, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x0c, 0x70, 0x69, 0x64, 0x66, 0x64, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x6b, 0x12, 0x2a, + 0x0a, 0x11, 0x6c, 0x73, 0x6d, 0x5f, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x18, 0x3f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x6c, 0x73, 0x6d, 0x4d, 0x6f, + 0x75, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x46, 0x0a, 0x0c, 0x6e, 0x65, + 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x5f, 0x6c, 0x6f, 0x63, 0x6b, 0x18, 0x40, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x19, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x5f, + 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x3a, 0x08, 0x49, 0x50, 0x54, + 0x41, 0x42, 0x4c, 0x45, 0x53, 0x52, 0x0b, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x4c, 0x6f, + 0x63, 0x6b, 0x12, 0x2a, 0x0a, 0x11, 0x6d, 0x6e, 0x74, 0x6e, 0x73, 0x5f, 0x63, 0x6f, 0x6d, 0x70, + 0x61, 0x74, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x41, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x6d, + 0x6e, 0x74, 0x6e, 0x73, 0x43, 0x6f, 0x6d, 0x70, 0x61, 0x74, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x2c, + 0x0a, 0x0e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x72, 0x65, 0x73, 0x70, + 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x22, 0x25, 0x0a, 0x11, + 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x72, 0x65, 0x73, + 0x70, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, 0x52, 0x03, + 0x70, 0x69, 0x64, 0x22, 0x37, 0x0a, 0x0b, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x6f, 0x74, 0x69, + 0x66, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x22, 0x6c, 0x0a, 0x0d, + 0x63, 0x72, 0x69, 0x75, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x6d, 0x65, 0x6d, 0x5f, 0x74, 0x72, 0x61, 0x63, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x6d, 0x65, 0x6d, 0x54, 0x72, 0x61, 0x63, 0x6b, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x61, + 0x7a, 0x79, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, + 0x6c, 0x61, 0x7a, 0x79, 0x50, 0x61, 0x67, 0x65, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x70, 0x69, 0x64, + 0x66, 0x64, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, + 0x70, 0x69, 0x64, 0x66, 0x64, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x63, + 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, 0x12, 0x22, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, + 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x1e, 0x0a, 0x04, 0x6f, + 0x70, 0x74, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0a, 0x2e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x6f, 0x70, 0x74, 0x73, 0x52, 0x04, 0x6f, 0x70, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x6e, + 0x6f, 0x74, 0x69, 0x66, 0x79, 0x5f, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x53, 0x75, 0x63, 0x63, 0x65, + 0x73, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x6b, 0x65, 0x65, 0x70, 0x5f, 0x6f, 0x70, 0x65, 0x6e, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6b, 0x65, 0x65, 0x70, 0x4f, 0x70, 0x65, 0x6e, 0x12, + 0x2a, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x70, + 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x03, 0x70, 0x69, 0x64, 0x22, 0x8f, 0x03, + 0x0a, 0x09, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x73, 0x70, 0x12, 0x22, 0x0a, 0x04, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, + 0x5f, 0x72, 0x65, 0x71, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, + 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x02, 0x28, 0x08, + 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x12, 0x23, 0x0a, 0x04, 0x64, 0x75, 0x6d, + 0x70, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x64, + 0x75, 0x6d, 0x70, 0x5f, 0x72, 0x65, 0x73, 0x70, 0x52, 0x04, 0x64, 0x75, 0x6d, 0x70, 0x12, 0x2c, + 0x0a, 0x07, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x12, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x72, + 0x65, 0x73, 0x70, 0x52, 0x07, 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x12, 0x24, 0x0a, 0x06, + 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x63, + 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x52, 0x06, 0x6e, 0x6f, 0x74, 0x69, + 0x66, 0x79, 0x12, 0x26, 0x0a, 0x02, 0x70, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, + 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, + 0x72, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x52, 0x02, 0x70, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x63, 0x72, + 0x5f, 0x65, 0x72, 0x72, 0x6e, 0x6f, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x63, 0x72, + 0x45, 0x72, 0x72, 0x6e, 0x6f, 0x12, 0x2a, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x66, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x72, 0x5f, 0x65, 0x72, 0x72, 0x6d, 0x73, 0x67, 0x18, 0x09, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x72, 0x45, 0x72, 0x72, 0x6d, 0x73, 0x67, 0x12, 0x27, + 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x0d, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x07, + 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, + 0xb0, 0x01, 0x0a, 0x0c, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x61, 0x6a, 0x6f, 0x72, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, + 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x61, 0x6a, 0x6f, 0x72, 0x4e, 0x75, 0x6d, + 0x62, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x69, 0x6e, 0x6f, 0x72, 0x5f, 0x6e, 0x75, 0x6d, + 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x02, 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x69, 0x6e, 0x6f, 0x72, + 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x69, 0x74, 0x69, 0x64, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x69, 0x74, 0x69, 0x64, 0x12, 0x1a, 0x0a, 0x08, + 0x73, 0x75, 0x62, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, + 0x73, 0x75, 0x62, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x78, 0x74, 0x72, + 0x61, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x65, 0x78, 0x74, 0x72, 0x61, 0x12, 0x12, + 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, + 0x6d, 0x65, 0x2a, 0x5f, 0x0a, 0x0c, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x63, 0x67, 0x5f, 0x6d, 0x6f, + 0x64, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x49, 0x47, 0x4e, 0x4f, 0x52, 0x45, 0x10, 0x00, 0x12, 0x0b, + 0x0a, 0x07, 0x43, 0x47, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, + 0x52, 0x4f, 0x50, 0x53, 0x10, 0x02, 0x12, 0x08, 0x0a, 0x04, 0x53, 0x4f, 0x46, 0x54, 0x10, 0x03, + 0x12, 0x08, 0x0a, 0x04, 0x46, 0x55, 0x4c, 0x4c, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, + 0x52, 0x49, 0x43, 0x54, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, + 0x54, 0x10, 0x06, 0x2a, 0x36, 0x0a, 0x18, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x65, 0x74, 0x77, + 0x6f, 0x72, 0x6b, 0x5f, 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, + 0x0c, 0x0a, 0x08, 0x49, 0x50, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x53, 0x10, 0x01, 0x12, 0x0c, 0x0a, + 0x08, 0x4e, 0x46, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x53, 0x10, 0x02, 0x2a, 0x2d, 0x0a, 0x12, 0x63, + 0x72, 0x69, 0x75, 0x5f, 0x70, 0x72, 0x65, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x6d, 0x6f, 0x64, + 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x50, 0x4c, 0x49, 0x43, 0x45, 0x10, 0x01, 0x12, 0x0b, 0x0a, + 0x07, 0x56, 0x4d, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x10, 0x02, 0x2a, 0xe5, 0x01, 0x0a, 0x0d, 0x63, + 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x12, 0x09, 0x0a, 0x05, + 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x55, 0x4d, 0x50, 0x10, + 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x45, 0x53, 0x54, 0x4f, 0x52, 0x45, 0x10, 0x02, 0x12, 0x09, + 0x0a, 0x05, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x03, 0x12, 0x0c, 0x0a, 0x08, 0x50, 0x52, 0x45, + 0x5f, 0x44, 0x55, 0x4d, 0x50, 0x10, 0x04, 0x12, 0x0f, 0x0a, 0x0b, 0x50, 0x41, 0x47, 0x45, 0x5f, + 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x4e, 0x4f, 0x54, 0x49, + 0x46, 0x59, 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x50, 0x55, 0x49, 0x4e, 0x46, 0x4f, 0x5f, + 0x44, 0x55, 0x4d, 0x50, 0x10, 0x07, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x50, 0x55, 0x49, 0x4e, 0x46, + 0x4f, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x08, 0x12, 0x11, 0x0a, 0x0d, 0x46, 0x45, 0x41, + 0x54, 0x55, 0x52, 0x45, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x09, 0x12, 0x0b, 0x0a, 0x07, + 0x56, 0x45, 0x52, 0x53, 0x49, 0x4f, 0x4e, 0x10, 0x0a, 0x12, 0x0c, 0x0a, 0x08, 0x57, 0x41, 0x49, + 0x54, 0x5f, 0x50, 0x49, 0x44, 0x10, 0x0b, 0x12, 0x14, 0x0a, 0x10, 0x50, 0x41, 0x47, 0x45, 0x5f, + 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x43, 0x48, 0x4c, 0x44, 0x10, 0x0c, 0x12, 0x13, 0x0a, + 0x0f, 0x53, 0x49, 0x4e, 0x47, 0x4c, 0x45, 0x5f, 0x50, 0x52, 0x45, 0x5f, 0x44, 0x55, 0x4d, 0x50, + 0x10, 0x0d, +} + +var ( + file_rpc_rpc_proto_rawDescOnce sync.Once + file_rpc_rpc_proto_rawDescData = file_rpc_rpc_proto_rawDesc +) + +func file_rpc_rpc_proto_rawDescGZIP() []byte { + file_rpc_rpc_proto_rawDescOnce.Do(func() { + file_rpc_rpc_proto_rawDescData = protoimpl.X.CompressGZIP(file_rpc_rpc_proto_rawDescData) + }) + return file_rpc_rpc_proto_rawDescData +} + +var file_rpc_rpc_proto_enumTypes = make([]protoimpl.EnumInfo, 4) +var file_rpc_rpc_proto_msgTypes = make([]protoimpl.MessageInfo, 15) +var file_rpc_rpc_proto_goTypes = []interface{}{ + (CriuCgMode)(0), // 0: criu_cg_mode + (CriuNetworkLockMethod)(0), // 1: criu_network_lock_method + (CriuPreDumpMode)(0), // 2: criu_pre_dump_mode + (CriuReqType)(0), // 3: criu_req_type + (*CriuPageServerInfo)(nil), // 4: criu_page_server_info + (*CriuVethPair)(nil), // 5: criu_veth_pair + (*ExtMountMap)(nil), // 6: ext_mount_map + (*JoinNamespace)(nil), // 7: join_namespace + (*InheritFd)(nil), // 8: inherit_fd + (*CgroupRoot)(nil), // 9: cgroup_root + (*UnixSk)(nil), // 10: unix_sk + (*CriuOpts)(nil), // 11: criu_opts + (*CriuDumpResp)(nil), // 12: criu_dump_resp + (*CriuRestoreResp)(nil), // 13: criu_restore_resp + (*CriuNotify)(nil), // 14: criu_notify + (*CriuFeatures)(nil), // 15: criu_features + (*CriuReq)(nil), // 16: criu_req + (*CriuResp)(nil), // 17: criu_resp + (*CriuVersion)(nil), // 18: criu_version +} +var file_rpc_rpc_proto_depIdxs = []int32{ + 4, // 0: criu_opts.ps:type_name -> criu_page_server_info + 5, // 1: criu_opts.veths:type_name -> criu_veth_pair + 6, // 2: criu_opts.ext_mnt:type_name -> ext_mount_map + 9, // 3: criu_opts.cg_root:type_name -> cgroup_root + 8, // 4: criu_opts.inherit_fd:type_name -> inherit_fd + 10, // 5: criu_opts.unix_sk_ino:type_name -> unix_sk + 0, // 6: criu_opts.manage_cgroups_mode:type_name -> criu_cg_mode + 7, // 7: criu_opts.join_ns:type_name -> join_namespace + 2, // 8: criu_opts.pre_dump_mode:type_name -> criu_pre_dump_mode + 1, // 9: criu_opts.network_lock:type_name -> criu_network_lock_method + 3, // 10: criu_req.type:type_name -> criu_req_type + 11, // 11: criu_req.opts:type_name -> criu_opts + 15, // 12: criu_req.features:type_name -> criu_features + 3, // 13: criu_resp.type:type_name -> criu_req_type + 12, // 14: criu_resp.dump:type_name -> criu_dump_resp + 13, // 15: criu_resp.restore:type_name -> criu_restore_resp + 14, // 16: criu_resp.notify:type_name -> criu_notify + 4, // 17: criu_resp.ps:type_name -> criu_page_server_info + 15, // 18: criu_resp.features:type_name -> criu_features + 18, // 19: criu_resp.version:type_name -> criu_version + 20, // [20:20] is the sub-list for method output_type + 20, // [20:20] is the sub-list for method input_type + 20, // [20:20] is the sub-list for extension type_name + 20, // [20:20] is the sub-list for extension extendee + 0, // [0:20] is the sub-list for field type_name +} + +func init() { file_rpc_rpc_proto_init() } +func file_rpc_rpc_proto_init() { + if File_rpc_rpc_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_rpc_rpc_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuPageServerInfo); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuVethPair); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ExtMountMap); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*JoinNamespace); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*InheritFd); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CgroupRoot); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*UnixSk); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuOpts); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuDumpResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuRestoreResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuNotify); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuFeatures); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuReq); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuResp); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_rpc_rpc_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CriuVersion); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_rpc_rpc_proto_rawDesc, + NumEnums: 4, + NumMessages: 15, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_rpc_rpc_proto_goTypes, + DependencyIndexes: file_rpc_rpc_proto_depIdxs, + EnumInfos: file_rpc_rpc_proto_enumTypes, + MessageInfos: file_rpc_rpc_proto_msgTypes, + }.Build() + File_rpc_rpc_proto = out.File + file_rpc_rpc_proto_rawDesc = nil + file_rpc_rpc_proto_goTypes = nil + file_rpc_rpc_proto_depIdxs = nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.proto b/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.proto new file mode 100644 index 0000000000..a6cc5da487 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v6/rpc/rpc.proto @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: MIT + +syntax = "proto2"; + +message criu_page_server_info { + optional string address = 1; + optional int32 port = 2; + optional int32 pid = 3; + optional int32 fd = 4; +} + +message criu_veth_pair { + required string if_in = 1; + required string if_out = 2; +}; + +message ext_mount_map { + required string key = 1; + required string val = 2; +}; + +message join_namespace { + required string ns = 1; + required string ns_file = 2; + optional string extra_opt = 3; +} + +message inherit_fd { + required string key = 1; + required int32 fd = 2; +}; + +message cgroup_root { + optional string ctrl = 1; + required string path = 2; +}; + +message unix_sk { + required uint32 inode = 1; +}; + +enum criu_cg_mode { + IGNORE = 0; + CG_NONE = 1; + PROPS = 2; + SOFT = 3; + FULL = 4; + STRICT = 5; + DEFAULT = 6; +}; + +enum criu_network_lock_method { + IPTABLES = 1; + NFTABLES = 2; +}; + +enum criu_pre_dump_mode { + SPLICE = 1; + VM_READ = 2; +}; + +message criu_opts { + required int32 images_dir_fd = 1; + optional int32 pid = 2; /* if not set on dump, will dump requesting process */ + + optional bool leave_running = 3; + optional bool ext_unix_sk = 4; + optional bool tcp_established = 5; + optional bool evasive_devices = 6; + optional bool shell_job = 7; + optional bool file_locks = 8; + optional int32 log_level = 9 [default = 2]; + optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */ + + optional criu_page_server_info ps = 11; + + optional bool notify_scripts = 12; + + optional string root = 13; + optional string parent_img = 14; + optional bool track_mem = 15; + optional bool auto_dedup = 16; + + optional int32 work_dir_fd = 17; + optional bool link_remap = 18; + repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */ + + optional uint32 cpu_cap = 20 [default = 0xffffffff]; + optional bool force_irmap = 21; + repeated string exec_cmd = 22; + + repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */ + optional bool manage_cgroups = 24; /* backward compatibility */ + repeated cgroup_root cg_root = 25; + + optional bool rst_sibling = 26; /* swrk only */ + repeated inherit_fd inherit_fd = 27; /* swrk only */ + + optional bool auto_ext_mnt = 28; + optional bool ext_sharing = 29; + optional bool ext_masters = 30; + + repeated string skip_mnt = 31; + repeated string enable_fs = 32; + + repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */ + + optional criu_cg_mode manage_cgroups_mode = 34; + optional uint32 ghost_limit = 35 [default = 0x100000]; + repeated string irmap_scan_paths = 36; + repeated string external = 37; + optional uint32 empty_ns = 38; + repeated join_namespace join_ns = 39; + + optional string cgroup_props = 41; + optional string cgroup_props_file = 42; + repeated string cgroup_dump_controller = 43; + + optional string freeze_cgroup = 44; + optional uint32 timeout = 45; + optional bool tcp_skip_in_flight = 46; + optional bool weak_sysctls = 47; + optional bool lazy_pages = 48; + optional int32 status_fd = 49; + optional bool orphan_pts_master = 50; + optional string config_file = 51; + optional bool tcp_close = 52; + optional string lsm_profile = 53; + optional string tls_cacert = 54; + optional string tls_cacrl = 55; + optional string tls_cert = 56; + optional string tls_key = 57; + optional bool tls = 58; + optional bool tls_no_cn_verify = 59; + optional string cgroup_yard = 60; + optional criu_pre_dump_mode pre_dump_mode = 61 [default = SPLICE]; + optional int32 pidfd_store_sk = 62; + optional string lsm_mount_context = 63; + optional criu_network_lock_method network_lock = 64 [default = IPTABLES]; + optional bool mntns_compat_mode = 65; +/* optional bool check_mounts = 128; */ +} + +message criu_dump_resp { + optional bool restored = 1; +} + +message criu_restore_resp { + required int32 pid = 1; +} + +message criu_notify { + optional string script = 1; + optional int32 pid = 2; +} + +enum criu_req_type { + EMPTY = 0; + DUMP = 1; + RESTORE = 2; + CHECK = 3; + PRE_DUMP = 4; + PAGE_SERVER = 5; + + NOTIFY = 6; + + CPUINFO_DUMP = 7; + CPUINFO_CHECK = 8; + + FEATURE_CHECK = 9; + + VERSION = 10; + + WAIT_PID = 11; + PAGE_SERVER_CHLD = 12; + + SINGLE_PRE_DUMP = 13; +} + +/* + * List of features which can queried via + * CRIU_REQ_TYPE__FEATURE_CHECK + */ +message criu_features { + optional bool mem_track = 1; + optional bool lazy_pages = 2; + optional bool pidfd_store = 3; +} + +/* + * Request -- each type corresponds to must-be-there + * request arguments of respective type + */ + +message criu_req { + required criu_req_type type = 1; + + optional criu_opts opts = 2; + optional bool notify_success = 3; + + /* + * When set service won't close the connection but + * will wait for more req-s to appear. Works not + * for all request types. + */ + optional bool keep_open = 4; + /* + * 'features' can be used to query which features + * are supported by the installed criu/kernel + * via RPC. + */ + optional criu_features features = 5; + + /* 'pid' is used for WAIT_PID */ + optional uint32 pid = 6; +} + +/* + * Response -- it states whether the request was served + * and additional request-specific information + */ + +message criu_resp { + required criu_req_type type = 1; + required bool success = 2; + + optional criu_dump_resp dump = 3; + optional criu_restore_resp restore = 4; + optional criu_notify notify = 5; + optional criu_page_server_info ps = 6; + + optional int32 cr_errno = 7; + optional criu_features features = 8; + optional string cr_errmsg = 9; + optional criu_version version = 10; + + optional int32 status = 11; +} + +/* Answer for criu_req_type.VERSION requests */ +message criu_version { + required int32 major_number = 1; + required int32 minor_number = 2; + optional string gitid = 3; + optional int32 sublevel = 4; + optional int32 extra = 5; + optional string name = 6; +} diff --git a/vendor/github.com/containerd/console/console_other.go b/vendor/github.com/containerd/console/console_other.go index 933dfaddda..968c5771c8 100644 --- a/vendor/github.com/containerd/console/console_other.go +++ b/vendor/github.com/containerd/console/console_other.go @@ -1,5 +1,5 @@ -//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos -// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos +//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !windows && !zos +// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!windows,!zos /* Copyright The containerd Authors. diff --git a/vendor/github.com/containerd/console/console_unix.go b/vendor/github.com/containerd/console/console_unix.go index 161f5d126c..aa4c696234 100644 --- a/vendor/github.com/containerd/console/console_unix.go +++ b/vendor/github.com/containerd/console/console_unix.go @@ -31,6 +31,15 @@ func NewPty() (Console, string, error) { if err != nil { return nil, "", err } + return NewPtyFromFile(f) +} + +// NewPtyFromFile creates a new pty pair, just like [NewPty] except that the +// provided [os.File] is used as the master rather than automatically creating +// a new master from /dev/ptmx. The ownership of [os.File] is passed to the +// returned [Console], so the caller must be careful to not call Close on the +// underlying file. +func NewPtyFromFile(f File) (Console, string, error) { slave, err := ptsname(f) if err != nil { return nil, "", err diff --git a/vendor/github.com/containerd/console/tc_darwin.go b/vendor/github.com/containerd/console/tc_darwin.go index 787154580f..77c695a40f 100644 --- a/vendor/github.com/containerd/console/tc_darwin.go +++ b/vendor/github.com/containerd/console/tc_darwin.go @@ -18,7 +18,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -30,12 +29,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { return unix.IoctlSetPointerInt(int(f.Fd()), unix.TIOCPTYUNLK, 0) } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_freebsd_cgo.go b/vendor/github.com/containerd/console/tc_freebsd_cgo.go index 3328257941..627f7d55a9 100644 --- a/vendor/github.com/containerd/console/tc_freebsd_cgo.go +++ b/vendor/github.com/containerd/console/tc_freebsd_cgo.go @@ -21,7 +21,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -39,7 +38,7 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { fd := C.int(f.Fd()) if _, err := C.unlockpt(fd); err != nil { C.close(fd) @@ -49,7 +48,7 @@ func unlockpt(f *os.File) error { } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go index 18a9b9cbea..434ba46efc 100644 --- a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go +++ b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go @@ -21,7 +21,6 @@ package console import ( "fmt" - "os" "golang.org/x/sys/unix" ) @@ -42,12 +41,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { panic("unlockpt() support requires cgo.") } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_linux.go b/vendor/github.com/containerd/console/tc_linux.go index 7d552ea4ba..e98dc022dc 100644 --- a/vendor/github.com/containerd/console/tc_linux.go +++ b/vendor/github.com/containerd/console/tc_linux.go @@ -18,7 +18,6 @@ package console import ( "fmt" - "os" "unsafe" "golang.org/x/sys/unix" @@ -31,7 +30,7 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { var u int32 // XXX do not use unix.IoctlSetPointerInt here, see commit dbd69c59b81. if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 { @@ -41,7 +40,7 @@ func unlockpt(f *os.File) error { } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { var u uint32 // XXX do not use unix.IoctlGetInt here, see commit dbd69c59b81. if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 { diff --git a/vendor/github.com/containerd/console/tc_netbsd.go b/vendor/github.com/containerd/console/tc_netbsd.go index 71227aefdf..73cf439777 100644 --- a/vendor/github.com/containerd/console/tc_netbsd.go +++ b/vendor/github.com/containerd/console/tc_netbsd.go @@ -18,7 +18,6 @@ package console import ( "bytes" - "os" "golang.org/x/sys/unix" ) @@ -31,12 +30,12 @@ const ( // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. // This does not exist on NetBSD, it does not allocate controlling terminals on open -func unlockpt(f *os.File) error { +func unlockpt(f File) error { return nil } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { ptm, err := unix.IoctlGetPtmget(int(f.Fd()), unix.TIOCPTSNAME) if err != nil { return "", err diff --git a/vendor/github.com/containerd/console/tc_openbsd_cgo.go b/vendor/github.com/containerd/console/tc_openbsd_cgo.go index 0e76f6cc3e..46f4250c4d 100644 --- a/vendor/github.com/containerd/console/tc_openbsd_cgo.go +++ b/vendor/github.com/containerd/console/tc_openbsd_cgo.go @@ -20,8 +20,6 @@ package console import ( - "os" - "golang.org/x/sys/unix" ) @@ -34,7 +32,7 @@ const ( ) // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { ptspath, err := C.ptsname(C.int(f.Fd())) if err != nil { return "", err @@ -44,7 +42,7 @@ func ptsname(f *os.File) (string, error) { // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. // unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { +func unlockpt(f File) error { if _, err := C.grantpt(C.int(f.Fd())); err != nil { return err } diff --git a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go index dca92418b0..a8f9f6c25c 100644 --- a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go +++ b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go @@ -29,8 +29,6 @@ package console import ( - "os" - "golang.org/x/sys/unix" ) @@ -39,10 +37,10 @@ const ( cmdTcSet = unix.TIOCSETA ) -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { panic("ptsname() support requires cgo.") } -func unlockpt(f *os.File) error { +func unlockpt(f File) error { panic("unlockpt() support requires cgo.") } diff --git a/vendor/github.com/containerd/console/tc_zos.go b/vendor/github.com/containerd/console/tc_zos.go index fc90ba5fb8..23b0bd2820 100644 --- a/vendor/github.com/containerd/console/tc_zos.go +++ b/vendor/github.com/containerd/console/tc_zos.go @@ -17,7 +17,6 @@ package console import ( - "os" "strings" "golang.org/x/sys/unix" @@ -29,11 +28,11 @@ const ( ) // unlockpt is a no-op on zos. -func unlockpt(_ *os.File) error { +func unlockpt(File) error { return nil } // ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { +func ptsname(f File) (string, error) { return "/dev/ttyp" + strings.TrimPrefix(f.Name(), "/dev/ptyp"), nil } diff --git a/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml new file mode 100644 index 0000000000..3e8dd99bd7 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: MPL-2.0 + +# Copyright (C) 2025 Aleksa Sarai +# Copyright (C) 2025 SUSE LLC +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +version: "2" + +run: + build-tags: + - libpathrs + +linters: + enable: + - asasalint + - asciicheck + - containedctx + - contextcheck + - errcheck + - errorlint + - exhaustive + - forcetypeassert + - godot + - goprintffuncname + - govet + - importas + - ineffassign + - makezero + - misspell + - musttag + - nilerr + - nilnesserr + - nilnil + - noctx + - prealloc + - revive + - staticcheck + - testifylint + - unconvert + - unparam + - unused + - usetesting + settings: + govet: + enable: + - nilness + testifylint: + enable-all: true + +formatters: + enable: + - gofumpt + - goimports + settings: + goimports: + local-prefixes: + - github.com/cyphar/filepath-securejoin diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md new file mode 100644 index 0000000000..734cf61e32 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md @@ -0,0 +1,461 @@ +# Changelog # +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/) +and this project adheres to [Semantic Versioning](http://semver.org/). + +## [Unreleased] ## + +## [0.6.0] - 2025-11-03 ## + +> By the Power of Greyskull! + +While quite small code-wise, this release marks a very key point in the +development of filepath-securejoin. + +filepath-securejoin was originally intended (back in 2017) to simply be a +single-purpose library that would take some common code used in container +runtimes (specifically, Docker's `FollowSymlinksInScope`) and make it more +general-purpose (with the eventual goals of it ending up in the Go stdlib). + +Of course, I quickly discovered that this problem was actually far more +complicated to solve when dealing with racing attackers, which lead to me +developing `openat2(2)` and [libpathrs][]. I had originally planned for +libpathrs to completely replace filepath-securejoin "once it was ready" but in +the interim we needed to fix several race attacks in runc as part of security +advisories. Obviously we couldn't require the usage of a pre-0.1 Rust library +in runc so it was necessary to port bits of libpathrs into filepath-securejoin. +(Ironically the first prototypes of libpathrs were originally written in Go and +then rewritten to Rust, so the code in filepath-securejoin is actually Go code +that was rewritten to Rust then re-rewritten to Go.) + +It then became clear that pure-Go libraries will likely not be willing to +require CGo for all of their builds, so it was necessary to accept that +filepath-securejoin will need to stay. As such, in v0.5.0 we provided more +pure-Go implementations of features from libpathrs but moved them into +`pathrs-lite` subpackage to clarify what purpose these helpers serve. + +This release finally closes the loop and makes it so that pathrs-lite can +transparently use libpathrs (via a `libpathrs` build-tag). This means that +upstream libraries can use the pure Go version if they prefer, but downstreams +(either downstream library users or even downstream distributions) are able to +migrate to libpathrs for all usages of pathrs-lite in an entire Go binary. + +I should make it clear that I do not plan to port the rest of libpathrs to Go, +as I do not wish to maintain two copies of the same codebase. pathrs-lite +already provides the core essentials necessary to operate on paths safely for +most modern systems. Users who want additional hardening or more ergonomic APIs +are free to use [`cyphar.com/go-pathrs`][go-pathrs] (libpathrs's Go bindings). + +[libpathrs]: https://github.com/cyphar/libpathrs +[go-pathrs]: https://cyphar.com/go-pathrs + +### Breaking ### +- The deprecated `MkdirAll`, `MkdirAllHandle`, `OpenInRoot`, `OpenatInRoot` and + `Reopen` wrappers have been removed. Please switch to using `pathrs-lite` + directly. + +### Added ### +- `pathrs-lite` now has support for using [libpathrs][libpathrs] as a backend. + This is opt-in and can be enabled at build time with the `libpathrs` build + tag. The intention is to allow for downstream libraries and other projects to + make use of the pure-Go `github.com/cyphar/filepath-securejoin/pathrs-lite` + package and distributors can then opt-in to using `libpathrs` for the entire + binary if they wish. + +## [0.5.1] - 2025-10-31 ## + +> Spooky scary skeletons send shivers down your spine! + +### Changed ### +- `openat2` can return `-EAGAIN` if it detects a possible attack in certain + scenarios (namely if there was a rename or mount while walking a path with a + `..` component). While this is necessary to avoid a denial-of-service in the + kernel, it does require retry loops in userspace. + + In previous versions, `pathrs-lite` would retry `openat2` 32 times before + returning an error, but we've received user reports that this limit can be + hit on systems with very heavy load. In some synthetic benchmarks (testing + the worst-case of an attacker doing renames in a tight loop on every core of + a 16-core machine) we managed to get a ~3% failure rate in runc. We have + improved this situation in two ways: + + * We have now increased this limit to 128, which should be good enough for + most use-cases without becoming a denial-of-service vector (the number of + syscalls called by the `O_PATH` resolver in a typical case is within the + same ballpark). The same benchmarks show a failure rate of ~0.12% which + (while not zero) is probably sufficient for most users. + + * In addition, we now return a `unix.EAGAIN` error that is bubbled up and can + be detected by callers. This means that callers with stricter requirements + to avoid spurious errors can choose to do their own infinite `EAGAIN` retry + loop (though we would strongly recommend users use time-based deadlines in + such retry loops to avoid potentially unbounded denials-of-service). + +## [0.5.0] - 2025-09-26 ## + +> Let the past die. Kill it if you have to. + +> **NOTE**: With this release, some parts of +> `github.com/cyphar/filepath-securejoin` are now licensed under the Mozilla +> Public License (version 2). Please see [COPYING.md][] as well as the the +> license header in each file for more details. + +[COPYING.md]: ./COPYING.md + +### Breaking ### +- The new API introduced in the [0.3.0][] release has been moved to a new + subpackage called `pathrs-lite`. This was primarily done to better indicate + the split between the new and old APIs, as well as indicate to users the + purpose of this subpackage (it is a less complete version of [libpathrs][]). + + We have added some wrappers to the top-level package to ease the transition, + but those are deprecated and will be removed in the next minor release of + filepath-securejoin. Users should update their import paths. + + This new subpackage has also been relicensed under the Mozilla Public License + (version 2), please see [COPYING.md][] for more details. + +### Added ### +- Most of the key bits the safe `procfs` API have now been exported and are + available in `github.com/cyphar/filepath-securejoin/pathrs-lite/procfs`. At + the moment this primarily consists of a new `procfs.Handle` API: + + * `OpenProcRoot` returns a new handle to `/proc`, endeavouring to make it + safe if possible (`subset=pid` to protect against mistaken write attacks + and leaks, as well as using `fsopen(2)` to avoid racing mount attacks). + + `OpenUnsafeProcRoot` returns a handle without attempting to create one + with `subset=pid`, which makes it more dangerous to leak. Most users + should use `OpenProcRoot` (even if you need to use `ProcRoot` as the base + of an operation, as filepath-securejoin will internally open a handle when + necessary). + + * The `(*procfs.Handle).Open*` family of methods lets you get a safe + `O_PATH` handle to subpaths within `/proc` for certain subpaths. + + For `OpenThreadSelf`, the returned `ProcThreadSelfCloser` needs to be + called after you completely finish using the handle (this is necessary + because Go is multi-threaded and `ProcThreadSelf` references + `/proc/thread-self` which may disappear if we do not + `runtime.LockOSThread` -- `ProcThreadSelfCloser` is currently equivalent + to `runtime.UnlockOSThread`). + + Note that you cannot open any `procfs` symlinks (most notably magic-links) + using this API. At the moment, filepath-securejoin does not support this + feature (but [libpathrs][] does). + + * `ProcSelfFdReadlink` lets you get the in-kernel path representation of a + file descriptor (think `readlink("/proc/self/fd/...")`), except that we + verify that there aren't any tricky overmounts that could fool the + process. + + Please be aware that the returned string is simply a snapshot at that + particular moment, and an attacker could move the file being pointed to. + In addition, complex namespace configurations could result in non-sensical + or confusing paths to be returned. The value received from this function + should only be used as secondary verification of some security property, + not as proof that a particular handle has a particular path. + + The procfs handle used internally by the API is the same as the rest of + `filepath-securejoin` (for privileged programs this is usually a private + in-process `procfs` instance created with `fsopen(2)`). + + As before, this is intended as a stop-gap before users migrate to + [libpathrs][], which provides a far more extensive safe `procfs` API and is + generally more robust. + +- Previously, the hardened procfs implementation (used internally within + `Reopen` and `Open(at)InRoot`) only protected against overmount attacks on + systems with `openat2(2)` (Linux 5.6) or systems with `fsopen(2)` or + `open_tree(2)` (Linux 5.2) and programs with privileges to use them (with + some caveats about locked mounts that probably affect very few users). For + other users, an attacker with the ability to create malicious mounts (on most + systems, a sysadmin) could trick you into operating on files you didn't + expect. This attack only really makes sense in the context of container + runtime implementations. + + This was considered a reasonable trade-off, as the long-term intention was to + get all users to just switch to [libpathrs][] if they wanted to use the safe + `procfs` API (which had more extensive protections, and is what these new + protections in `filepath-securejoin` are based on). However, as the API + is now being exported it seems unwise to advertise the API as "safe" if we do + not protect against known attacks. + + The procfs API is now more protected against attackers on systems lacking the + aforementioned protections. However, the most comprehensive of these + protections effectively rely on [`statx(STATX_MNT_ID)`][statx.2] (Linux 5.8). + On older kernel versions, there is no effective protection (there is some + minimal protection against non-`procfs` filesystem components but a + sufficiently clever attacker can work around those). In addition, + `STATX_MNT_ID` is vulnerable to mount ID reuse attacks by sufficiently + motivated and privileged attackers -- this problem is mitigated with + `STATX_MNT_ID_UNIQUE` (Linux 6.8) but that raises the minimum kernel version + for more protection. + + The fact that these protections are quite limited despite needing a fair bit + of extra code to handle was one of the primary reasons we did not initially + implement this in `filepath-securejoin` ([libpathrs][] supports all of this, + of course). + +### Fixed ### +- RHEL 8 kernels have backports of `fsopen(2)` but in some testing we've found + that it has very bad (and very difficult to debug) performance issues, and so + we will explicitly refuse to use `fsopen(2)` if the running kernel version is + pre-5.2 and will instead fallback to `open("/proc")`. + +[CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv +[libpathrs]: https://github.com/cyphar/libpathrs +[statx.2]: https://www.man7.org/linux/man-pages/man2/statx.2.html + +## [0.4.1] - 2025-01-28 ## + +### Fixed ### +- The restrictions added for `root` paths passed to `SecureJoin` in 0.4.0 was + found to be too strict and caused some regressions when folks tried to + update, so this restriction has been relaxed to only return an error if the + path contains a `..` component. We still recommend users use `filepath.Clean` + (and even `filepath.EvalSymlinks`) on the `root` path they are using, but at + least you will no longer be punished for "trivial" unclean paths. + +## [0.4.0] - 2025-01-13 ## + +### Breaking #### +- `SecureJoin(VFS)` will now return an error if the provided `root` is not a + `filepath.Clean`'d path. + + While it is ultimately the responsibility of the caller to ensure the root is + a safe path to use, passing a path like `/symlink/..` as a root would result + in the `SecureJoin`'d path being placed in `/` even though `/symlink/..` + might be a different directory, and so we should more strongly discourage + such usage. + + All major users of `securejoin.SecureJoin` already ensure that the paths they + provide are safe (and this is ultimately a question of user error), but + removing this foot-gun is probably a good idea. Of course, this is + necessarily a breaking API change (though we expect no real users to be + affected by it). + + Thanks to [Erik Sjölund](https://github.com/eriksjolund), who initially + reported this issue as a possible security issue. + +- `MkdirAll` and `MkdirHandle` now take an `os.FileMode`-style mode argument + instead of a raw `unix.S_*`-style mode argument, which may cause compile-time + type errors depending on how you use `filepath-securejoin`. For most users, + there will be no change in behaviour aside from the type change (as the + bottom `0o777` bits are the same in both formats, and most users are probably + only using those bits). + + However, if you were using `unix.S_ISVTX` to set the sticky bit with + `MkdirAll(Handle)` you will need to switch to `os.ModeSticky` otherwise you + will get a runtime error with this update. In addition, the error message you + will get from passing `unix.S_ISUID` and `unix.S_ISGID` will be different as + they are treated as invalid bits now (note that previously passing said bits + was also an error). + +## [0.3.6] - 2024-12-17 ## + +### Compatibility ### +- The minimum Go version requirement for `filepath-securejoin` is now Go 1.18 + (we use generics internally). + + For reference, `filepath-securejoin@v0.3.0` somewhat-arbitrarily bumped the + Go version requirement to 1.21. + + While we did make some use of Go 1.21 stdlib features (and in principle Go + versions <= 1.21 are no longer even supported by upstream anymore), some + downstreams have complained that the version bump has meant that they have to + do workarounds when backporting fixes that use the new `filepath-securejoin` + API onto old branches. This is not an ideal situation, but since using this + library is probably better for most downstreams than a hand-rolled + workaround, we now have compatibility shims that allow us to build on older + Go versions. +- Lower minimum version requirement for `golang.org/x/sys` to `v0.18.0` (we + need the wrappers for `fsconfig(2)`), which should also make backporting + patches to older branches easier. + +## [0.3.5] - 2024-12-06 ## + +### Fixed ### +- `MkdirAll` will now no longer return an `EEXIST` error if two racing + processes are creating the same directory. We will still verify that the path + is a directory, but this will avoid spurious errors when multiple threads or + programs are trying to `MkdirAll` the same path. opencontainers/runc#4543 + +## [0.3.4] - 2024-10-09 ## + +### Fixed ### +- Previously, some testing mocks we had resulted in us doing `import "testing"` + in non-`_test.go` code, which made some downstreams like Kubernetes unhappy. + This has been fixed. (#32) + +## [0.3.3] - 2024-09-30 ## + +### Fixed ### +- The mode and owner verification logic in `MkdirAll` has been removed. This + was originally intended to protect against some theoretical attacks but upon + further consideration these protections don't actually buy us anything and + they were causing spurious errors with more complicated filesystem setups. +- The "is the created directory empty" logic in `MkdirAll` has also been + removed. This was not causing us issues yet, but some pseudofilesystems (such + as `cgroup`) create non-empty directories and so this logic would've been + wrong for such cases. + +## [0.3.2] - 2024-09-13 ## + +### Changed ### +- Passing the `S_ISUID` or `S_ISGID` modes to `MkdirAllInRoot` will now return + an explicit error saying that those bits are ignored by `mkdirat(2)`. In the + past a different error was returned, but since the silent ignoring behaviour + is codified in the man pages a more explicit error seems apt. While silently + ignoring these bits would be the most compatible option, it could lead to + users thinking their code sets these bits when it doesn't. Programs that need + to deal with compatibility can mask the bits themselves. (#23, #25) + +### Fixed ### +- If a directory has `S_ISGID` set, then all child directories will have + `S_ISGID` set when created and a different gid will be used for any inode + created under the directory. Previously, the "expected owner and mode" + validation in `securejoin.MkdirAll` did not correctly handle this. We now + correctly handle this case. (#24, #25) + +## [0.3.1] - 2024-07-23 ## + +### Changed ### +- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll` + to do the necessary "partial lookups", `Open(at)InRoot` now does less work + for both implementations (resulting in a many-fold decrease in the number of + operations for `openat2`, and a modest improvement for non-`openat2`) and is + far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)` + behaviour. +- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this + effectively just means that we no longer risk getting spurious errors during + rename races. However, for our hardened procfs handler, this in theory should + prevent mount attacks from tricking us when doing magic-link readlinks (even + when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still + potentially vulnerable to those kinds of somewhat-esoteric attacks. + + Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath] + but it seems incredibly unlikely anyone is using `filepath-securejoin` on a + pre-2011 kernel. + +### Fixed ### +- Several improvements were made to the errors returned by `Open(at)InRoot` and + `MkdirAll` when dealing with invalid paths under the emulated (ie. + non-`openat2`) implementation. Previously, some paths would return the wrong + error (`ENOENT` when the last component was a non-directory), and other paths + would be returned as though they were acceptable (trailing-slash components + after a non-directory would be ignored by `Open(at)InRoot`). + + These changes were done to match `openat2`'s behaviour and purely is a + consistency fix (most users are going to be using `openat2` anyway). + +[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d + +## [0.3.0] - 2024-07-11 ## + +### Added ### +- A new set of `*os.File`-based APIs have been added. These are adapted from + [libpathrs][] and we strongly suggest using them if possible (as they provide + far more protection against attacks than `SecureJoin`): + + - `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File` + handle to the path. Note that the handle returned is an `O_PATH` handle, + which cannot be used for reading or writing (as well as some other + operations -- [see open(2) for more details][open.2]) + + - `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade + it to a regular handle. This can also be used with non-`O_PATH` handles, + but `O_PATH` is the most obvious application. + + - `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to + create a directory tree within a rootfs. + + As these are new APIs, they may change in the future. However, they should be + safe to start migrating to as we have extensive tests ensuring they behave + correctly and are safe against various races and other attacks. + +[libpathrs]: https://github.com/cyphar/libpathrs +[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html + +## [0.2.5] - 2024-05-03 ## + +### Changed ### +- Some minor changes were made to how lexical components (like `..` and `.`) + are handled during path generation in `SecureJoin`. There is no behaviour + change as a result of this fix (the resulting paths are the same). + +### Fixed ### +- The error returned when we hit a symlink loop now references the correct + path. (#10) + +## [0.2.4] - 2023-09-06 ## + +### Security ### +- This release fixes a potential security issue in filepath-securejoin when + used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate + paths outside of the provided rootfs in certain cases), as well as improving + the overall behaviour of filepath-securejoin when dealing with Windows paths + that contain volume names. Thanks to Paulo Gomes for discovering and fixing + these issues. + +### Fixed ### +- Switch to GitHub Actions for CI so we can test on Windows as well as Linux + and MacOS. + +[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8 + +## [0.2.3] - 2021-06-04 ## + +### Changed ### +- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency + on `github.com/pkg/errors`. + +## [0.2.2] - 2018-09-05 ## + +### Changed ### +- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own + (internal) error. This allows callers to more easily use `errors.Is` to check + for this case. + +## [0.2.1] - 2018-09-05 ## + +### Fixed ### +- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR` + properly within `SecureJoin`. + +## [0.2.0] - 2017-07-19 ## + +We now have 100% test coverage! + +### Added ### +- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new + tests) or for implementing custom handling of lookup operations (such as for + rootless containers, where work is necessary to access directories with weird + modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`). + +## 0.1.0 - 2017-07-19 + +This is our first release of `github.com/cyphar/filepath-securejoin`, +containing a full implementation with a coverage of 93.5% (the only missing +cases are the error cases, which are hard to mocktest at the moment). + +[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.6.0...HEAD +[0.6.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.1...v0.6.0 +[0.5.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...v0.5.1 +[0.5.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...v0.5.0 +[0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1 +[0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0 +[0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6 +[0.3.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.4...v0.3.5 +[0.3.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.3...v0.3.4 +[0.3.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.2...v0.3.3 +[0.3.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...v0.3.2 +[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1 +[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0 +[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5 +[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4 +[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3 +[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2 +[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1 +[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/COPYING.md b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md new file mode 100644 index 0000000000..520e822b18 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md @@ -0,0 +1,447 @@ +## COPYING ## + +`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` + +This project is made up of code licensed under different licenses. Which code +you use will have an impact on whether only one or both licenses apply to your +usage of this library. + +Note that **each file** in this project individually has a code comment at the +start describing the license of that particular file -- this is the most +accurate license information of this project; in case there is any conflict +between this document and the comment at the start of a file, the comment shall +take precedence. The only purpose of this document is to work around [a known +technical limitation of pkg.go.dev's license checking tool when dealing with +non-trivial project licenses][go75067]. + +[go75067]: https://go.dev/issue/75067 + +### `BSD-3-Clause` ### + +At time of writing, the following files and directories are licensed under the +BSD-3-Clause license: + + * `doc.go` + * `join*.go` + * `vfs.go` + * `internal/consts/*.go` + * `pathrs-lite/internal/gocompat/*.go` + * `pathrs-lite/internal/kernelversion/*.go` + +The text of the BSD-3-Clause license used by this project is the following (the +text is also available from the [`LICENSE.BSD`](./LICENSE.BSD) file): + +``` +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017-2024 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` + +### `MPL-2.0` ### + +All other files (unless otherwise marked) are licensed under the Mozilla Public +License (version 2.0). + +The text of the Mozilla Public License (version 2.0) is the following (the text +is also available from the [`LICENSE.MPL-2.0`](./LICENSE.MPL-2.0) file): + +``` +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. +``` diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD new file mode 100644 index 0000000000..cb1ab88da0 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD @@ -0,0 +1,28 @@ +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017-2024 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 new file mode 100644 index 0000000000..d0a1fa1482 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md new file mode 100644 index 0000000000..6673abfc84 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -0,0 +1,184 @@ +## `filepath-securejoin` ## + +[![Go Documentation](https://pkg.go.dev/badge/github.com/cyphar/filepath-securejoin.svg)](https://pkg.go.dev/github.com/cyphar/filepath-securejoin) +[![Build Status](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml/badge.svg)](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml) + +### Old API ### + +This library was originally just an implementation of `SecureJoin` which was +[intended to be included in the Go standard library][go#20126] as a safer +`filepath.Join` that would restrict the path lookup to be inside a root +directory. + +The implementation was based on code that existed in several container +runtimes. Unfortunately, this API is **fundamentally unsafe** against attackers +that can modify path components after `SecureJoin` returns and before the +caller uses the path, allowing for some fairly trivial TOCTOU attacks. + +`SecureJoin` (and `SecureJoinVFS`) are still provided by this library to +support legacy users, but new users are strongly suggested to avoid using +`SecureJoin` and instead use the [new api](#new-api) or switch to +[libpathrs][libpathrs]. + +With the above limitations in mind, this library guarantees the following: + +* If no error is set, the resulting string **must** be a child path of + `root` and will not contain any symlink path components (they will all be + expanded). + +* When expanding symlinks, all symlink path components **must** be resolved + relative to the provided root. In particular, this can be considered a + userspace implementation of how `chroot(2)` operates on file paths. Note that + these symlinks will **not** be expanded lexically (`filepath.Clean` is not + called on the input before processing). + +* Non-existent path components are unaffected by `SecureJoin` (similar to + `filepath.EvalSymlinks`'s semantics). + +* The returned path will always be `filepath.Clean`ed and thus not contain any + `..` components. + +A (trivial) implementation of this function on GNU/Linux systems could be done +with the following (note that this requires root privileges and is far more +opaque than the implementation in this library, and also requires that +`readlink` is inside the `root` path and is trustworthy): + +```go +package securejoin + +import ( + "os/exec" + "path/filepath" +) + +func SecureJoin(root, unsafePath string) (string, error) { + unsafePath = string(filepath.Separator) + unsafePath + cmd := exec.Command("chroot", root, + "readlink", "--canonicalize-missing", "--no-newline", unsafePath) + output, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + expanded := string(output) + return filepath.Join(root, expanded), nil +} +``` + +[libpathrs]: https://github.com/openSUSE/libpathrs +[go#20126]: https://github.com/golang/go/issues/20126 + +### New API ### +[#new-api]: #new-api + +While we recommend users switch to [libpathrs][libpathrs] as soon as it has a +stable release, some methods implemented by libpathrs have been ported to this +library to ease the transition. These APIs are only supported on Linux. + +These APIs are implemented such that `filepath-securejoin` will +opportunistically use certain newer kernel APIs that make these operations far +more secure. In particular: + +* All of the lookup operations will use [`openat2`][openat2.2] on new enough + kernels (Linux 5.6 or later) to restrict lookups through magic-links and + bind-mounts (for certain operations) and to make use of `RESOLVE_IN_ROOT` to + efficiently resolve symlinks within a rootfs. + +* The APIs provide hardening against a malicious `/proc` mount to either detect + or avoid being tricked by a `/proc` that is not legitimate. This is done + using [`openat2`][openat2.2] for all users, and privileged users will also be + further protected by using [`fsopen`][fsopen.2] and [`open_tree`][open_tree.2] + (Linux 5.2 or later). + +[openat2.2]: https://www.man7.org/linux/man-pages/man2/openat2.2.html +[fsopen.2]: https://github.com/brauner/man-pages-md/blob/main/fsopen.md +[open_tree.2]: https://github.com/brauner/man-pages-md/blob/main/open_tree.md + +#### `OpenInRoot` #### + +```go +func OpenInRoot(root, unsafePath string) (*os.File, error) +func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) +func Reopen(handle *os.File, flags int) (*os.File, error) +``` + +`OpenInRoot` is a much safer version of + +```go +path, err := securejoin.SecureJoin(root, unsafePath) +file, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC) +``` + +that protects against various race attacks that could lead to serious security +issues, depending on the application. Note that the returned `*os.File` is an +`O_PATH` file descriptor, which is quite restricted. Callers will probably need +to use `Reopen` to get a more usable handle (this split is done to provide +useful features like PTY spawning and to avoid users accidentally opening bad +inodes that could cause a DoS). + +Callers need to be careful in how they use the returned `*os.File`. Usually it +is only safe to operate on the handle directly, and it is very easy to create a +security issue. [libpathrs][libpathrs] provides far more helpers to make using +these handles safer -- there is currently no plan to port them to +`filepath-securejoin`. + +`OpenatInRoot` is like `OpenInRoot` except that the root is provided using an +`*os.File`. This allows you to ensure that multiple `OpenatInRoot` (or +`MkdirAllHandle`) calls are operating on the same rootfs. + +> **NOTE**: Unlike `SecureJoin`, `OpenInRoot` will error out as soon as it hits +> a dangling symlink or non-existent path. This is in contrast to `SecureJoin` +> which treated non-existent components as though they were real directories, +> and would allow for partial resolution of dangling symlinks. These behaviours +> are at odds with how Linux treats non-existent paths and dangling symlinks, +> and so these are no longer allowed. + +#### `MkdirAll` #### + +```go +func MkdirAll(root, unsafePath string, mode int) error +func MkdirAllHandle(root *os.File, unsafePath string, mode int) (*os.File, error) +``` + +`MkdirAll` is a much safer version of + +```go +path, err := securejoin.SecureJoin(root, unsafePath) +err = os.MkdirAll(path, mode) +``` + +that protects against the same kinds of races that `OpenInRoot` protects +against. + +`MkdirAllHandle` is like `MkdirAll` except that the root is provided using an +`*os.File` (the reason for this is the same as with `OpenatInRoot`) and an +`*os.File` of the final created directory is returned (this directory is +guaranteed to be effectively identical to the directory created by +`MkdirAllHandle`, which is not possible to ensure by just using `OpenatInRoot` +after `MkdirAll`). + +> **NOTE**: Unlike `SecureJoin`, `MkdirAll` will error out as soon as it hits +> a dangling symlink or non-existent path. This is in contrast to `SecureJoin` +> which treated non-existent components as though they were real directories, +> and would allow for partial resolution of dangling symlinks. These behaviours +> are at odds with how Linux treats non-existent paths and dangling symlinks, +> and so these are no longer allowed. This means that `MkdirAll` will not +> create non-existent directories referenced by a dangling symlink. + +### License ### + +`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` + +Some of the code in this project is derived from Go, and is licensed under a +BSD 3-clause license (available in `LICENSE.BSD`). Other files (many of which +are derived from [libpathrs][libpathrs]) are licensed under the Mozilla Public +License version 2.0 (available in `LICENSE.MPL-2.0`). If you are using the +["New API" described above][#new-api], you are probably using code from files +released under this license. + +Every source file in this project has a copyright header describing its +license. Please check the license headers of each file to see what license +applies to it. + +See [COPYING.md](./COPYING.md) for some more details. + +[umoci]: https://github.com/opencontainers/umoci diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION new file mode 100644 index 0000000000..a918a2aa18 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -0,0 +1 @@ +0.6.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/codecov.yml b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml new file mode 100644 index 0000000000..ff284dbfaf --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: MPL-2.0 + +# Copyright (C) 2025 Aleksa Sarai +# Copyright (C) 2025 SUSE LLC +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +comment: + layout: "condensed_header, reach, diff, components, condensed_files, condensed_footer" + require_changes: true + branches: + - main + +coverage: + range: 60..100 + status: + project: + default: + target: 85% + threshold: 0% + patch: + default: + target: auto + informational: true + +github_checks: + annotations: false diff --git a/vendor/github.com/cyphar/filepath-securejoin/doc.go b/vendor/github.com/cyphar/filepath-securejoin/doc.go new file mode 100644 index 0000000000..1438fc9c09 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/doc.go @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017-2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package securejoin implements a set of helpers to make it easier to write Go +// code that is safe against symlink-related escape attacks. The primary idea +// is to let you resolve a path within a rootfs directory as if the rootfs was +// a chroot. +// +// securejoin has two APIs, a "legacy" API and a "modern" API. +// +// The legacy API is [SecureJoin] and [SecureJoinVFS]. These methods are +// **not** safe against race conditions where an attacker changes the +// filesystem after (or during) the [SecureJoin] operation. +// +// The new API is available in the [pathrs-lite] subpackage, and provide +// protections against racing attackers as well as several other key +// protections against attacks often seen by container runtimes. As the name +// suggests, [pathrs-lite] is a stripped down (pure Go) reimplementation of +// [libpathrs]. The main APIs provided are [OpenInRoot], [MkdirAll], and +// [procfs.Handle] -- other APIs are not planned to be ported. The long-term +// goal is for users to migrate to [libpathrs] which is more fully-featured. +// +// securejoin has been used by several container runtimes (Docker, runc, +// Kubernetes, etc) for quite a few years as a de-facto standard for operating +// on container filesystem paths "safely". However, most users still use the +// legacy API which is unsafe against various attacks (there is a fairly long +// history of CVEs in dependent as a result). Users should switch to the modern +// API as soon as possible (or even better, switch to libpathrs). +// +// This project was initially intended to be included in the Go standard +// library, but it was rejected (see https://go.dev/issue/20126). Much later, +// [os.Root] was added to the Go stdlib that shares some of the goals of +// filepath-securejoin. However, its design is intended to work like +// openat2(RESOLVE_BENEATH) which does not fit the usecase of container +// runtimes and most system tools. +// +// [pathrs-lite]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite +// [libpathrs]: https://github.com/openSUSE/libpathrs +// [OpenInRoot]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#OpenInRoot +// [MkdirAll]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#MkdirAll +// [procfs.Handle]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle +// [os.Root]: https:///pkg.go.dev/os#Root +package securejoin diff --git a/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go new file mode 100644 index 0000000000..c69c4da91e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package consts contains the definitions of internal constants used +// throughout filepath-securejoin. +package consts + +// MaxSymlinkLimit is the maximum number of symlinks that can be encountered +// during a single lookup before returning -ELOOP. At time of writing, Linux +// has an internal limit of 40. +const MaxSymlinkLimit = 255 diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go new file mode 100644 index 0000000000..199c1d8392 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package securejoin + +import ( + "errors" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/cyphar/filepath-securejoin/internal/consts" +) + +// IsNotExist tells you if err is an error that implies that either the path +// accessed does not exist (or path components don't exist). This is +// effectively a more broad version of [os.IsNotExist]. +func IsNotExist(err error) bool { + // Check that it's not actually an ENOTDIR, which in some cases is a more + // convoluted case of ENOENT (usually involving weird paths). + return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT) +} + +// errUnsafeRoot is returned if the user provides SecureJoinVFS with a path +// that contains ".." components. +var errUnsafeRoot = errors.New("root path provided to SecureJoin contains '..' components") + +// stripVolume just gets rid of the Windows volume included in a path. Based on +// some godbolt tests, the Go compiler is smart enough to make this a no-op on +// Linux. +func stripVolume(path string) string { + return path[len(filepath.VolumeName(path)):] +} + +// hasDotDot checks if the path contains ".." components in a platform-agnostic +// way. +func hasDotDot(path string) bool { + // If we are on Windows, strip any volume letters. It turns out that + // C:..\foo may (or may not) be a valid pathname and we need to handle that + // leading "..". + path = stripVolume(path) + // Look for "/../" in the path, but we need to handle leading and trailing + // ".."s by adding separators. Doing this with filepath.Separator is ugly + // so just convert to Unix-style "/" first. + path = filepath.ToSlash(path) + return strings.Contains("/"+path+"/", "/../") +} + +// SecureJoinVFS joins the two given path components (similar to +// [filepath.Join]) except that the returned path is guaranteed to be scoped +// inside the provided root path (when evaluated). Any symbolic links in the +// path are evaluated with the given root treated as the root of the +// filesystem, similar to a chroot. The filesystem state is evaluated through +// the given [VFS] interface (if nil, the standard [os].* family of functions +// are used). +// +// Note that the guarantees provided by this function only apply if the path +// components in the returned string are not modified (in other words are not +// replaced with symlinks on the filesystem) after this function has returned. +// Such a symlink race is necessarily out-of-scope of SecureJoinVFS. +// +// NOTE: Due to the above limitation, Linux users are strongly encouraged to +// use [OpenInRoot] instead, which does safely protect against these kinds of +// attacks. There is no way to solve this problem with SecureJoinVFS because +// the API is fundamentally wrong (you cannot return a "safe" path string and +// guarantee it won't be modified afterwards). +// +// Volume names in unsafePath are always discarded, regardless if they are +// provided via direct input or when evaluating symlinks. Therefore: +// +// "C:\Temp" + "D:\path\to\file.txt" results in "C:\Temp\path\to\file.txt" +// +// If the provided root is not [filepath.Clean] then an error will be returned, +// as such root paths are bordering on somewhat unsafe and using such paths is +// not best practice. We also strongly suggest that any root path is first +// fully resolved using [filepath.EvalSymlinks] or otherwise constructed to +// avoid containing symlink components. Of course, the root also *must not* be +// attacker-controlled. +func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { //nolint:revive // name is part of public API + // The root path must not contain ".." components, otherwise when we join + // the subpath we will end up with a weird path. We could work around this + // in other ways but users shouldn't be giving us non-lexical root paths in + // the first place. + if hasDotDot(root) { + return "", errUnsafeRoot + } + + // Use the os.* VFS implementation if none was specified. + if vfs == nil { + vfs = osVFS{} + } + + unsafePath = filepath.FromSlash(unsafePath) + var ( + currentPath string + remainingPath = unsafePath + linksWalked int + ) + for remainingPath != "" { + // On Windows, if we managed to end up at a path referencing a volume, + // drop the volume to make sure we don't end up with broken paths or + // escaping the root volume. + remainingPath = stripVolume(remainingPath) + + // Get the next path component. + var part string + if i := strings.IndexRune(remainingPath, filepath.Separator); i == -1 { + part, remainingPath = remainingPath, "" + } else { + part, remainingPath = remainingPath[:i], remainingPath[i+1:] + } + + // Apply the component lexically to the path we are building. + // currentPath does not contain any symlinks, and we are lexically + // dealing with a single component, so it's okay to do a filepath.Clean + // here. + nextPath := filepath.Join(string(filepath.Separator), currentPath, part) + if nextPath == string(filepath.Separator) { + currentPath = "" + continue + } + fullPath := root + string(filepath.Separator) + nextPath + + // Figure out whether the path is a symlink. + fi, err := vfs.Lstat(fullPath) + if err != nil && !IsNotExist(err) { + return "", err + } + // Treat non-existent path components the same as non-symlinks (we + // can't do any better here). + if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { + currentPath = nextPath + continue + } + + // It's a symlink, so get its contents and expand it by prepending it + // to the yet-unparsed path. + linksWalked++ + if linksWalked > consts.MaxSymlinkLimit { + return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP} + } + + dest, err := vfs.Readlink(fullPath) + if err != nil { + return "", err + } + remainingPath = dest + string(filepath.Separator) + remainingPath + // Absolute symlinks reset any work we've already done. + if filepath.IsAbs(dest) { + currentPath = "" + } + } + + // There should be no lexical components like ".." left in the path here, + // but for safety clean up the path before joining it to the root. + finalPath := filepath.Join(string(filepath.Separator), currentPath) + return filepath.Join(root, finalPath), nil +} + +// SecureJoin is a wrapper around [SecureJoinVFS] that just uses the [os].* library +// of functions as the [VFS]. If in doubt, use this function over [SecureJoinVFS]. +func SecureJoin(root, unsafePath string) (string, error) { + return SecureJoinVFS(root, unsafePath, nil) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md new file mode 100644 index 0000000000..bb95b028c6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md @@ -0,0 +1,35 @@ +## `pathrs-lite` ## + +`github.com/cyphar/filepath-securejoin/pathrs-lite` provides a minimal **pure +Go** implementation of the core bits of [libpathrs][]. This is not intended to +be a complete replacement for libpathrs, instead it is mainly intended to be +useful as a transition tool for existing Go projects. + +`pathrs-lite` also provides a very easy way to switch to `libpathrs` (even for +downstreams where `pathrs-lite` is being used in a third-party package and is +not interested in using CGo). At build time, if you use the `libpathrs` build +tag then `pathrs-lite` will use `libpathrs` directly instead of the pure Go +implementation. The two backends are functionally equivalent (and we have +integration tests to verify this), so this migration should be very easy with +no user-visible impact. + +[libpathrs]: https://github.com/cyphar/libpathrs + +### License ### + +Most of this subpackage is licensed under the Mozilla Public License (version +2.0). For more information, see the top-level [COPYING.md][] and +[LICENSE.MPL-2.0][] files, as well as the individual license headers for each +file. + +``` +Copyright (C) 2024-2025 Aleksa Sarai +Copyright (C) 2024-2025 SUSE LLC + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at https://mozilla.org/MPL/2.0/. +``` + +[COPYING.md]: ../COPYING.md +[LICENSE.MPL-2.0]: ../LICENSE.MPL-2.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go new file mode 100644 index 0000000000..61411da37a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package pathrs (pathrs-lite) is a less complete pure Go implementation of +// some of the APIs provided by [libpathrs]. +// +// [libpathrs]: https://github.com/cyphar/libpathrs +package pathrs diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go new file mode 100644 index 0000000000..595dfbf1ac --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2025 Aleksa Sarai +// Copyright (C) 2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package assert provides some basic assertion helpers for Go. +package assert + +import ( + "fmt" +) + +// Assert panics if the predicate is false with the provided argument. +func Assert(predicate bool, msg any) { + if !predicate { + panic(msg) + } +} + +// Assertf panics if the predicate is false and formats the message using the +// same formatting as [fmt.Printf]. +// +// [fmt.Printf]: https://pkg.go.dev/fmt#Printf +func Assertf(predicate bool, fmtMsg string, args ...any) { + Assert(predicate, fmt.Sprintf(fmtMsg, args...)) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go new file mode 100644 index 0000000000..d0b200f4f9 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package internal contains unexported common code for filepath-securejoin. +package internal + +import ( + "errors" + + "golang.org/x/sys/unix" +) + +type xdevErrorish struct { + description string +} + +func (err xdevErrorish) Error() string { return err.description } +func (err xdevErrorish) Is(target error) bool { return target == unix.EXDEV } + +var ( + // ErrPossibleAttack indicates that some attack was detected. + ErrPossibleAttack error = xdevErrorish{"possible attack detected"} + + // ErrPossibleBreakout indicates that during an operation we ended up in a + // state that could be a breakout but we detected it. + ErrPossibleBreakout error = xdevErrorish{"possible breakout detected"} + + // ErrInvalidDirectory indicates an unlinked directory. + ErrInvalidDirectory = errors.New("wandered into deleted directory") + + // ErrDeletedInode indicates an unlinked file (non-directory). + ErrDeletedInode = errors.New("cannot verify path of deleted inode") +) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go new file mode 100644 index 0000000000..0910549130 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// prepareAtWith returns -EBADF (an invalid fd) if dir is nil, otherwise using +// the dir.Fd(). We use -EBADF because in filepath-securejoin we generally +// don't want to allow relative-to-cwd paths. The returned path is an +// *informational* string that describes a reasonable pathname for the given +// *at(2) arguments. You must not use the full path for any actual filesystem +// operations. +func prepareAt(dir Fd, path string) (dirFd int, unsafeUnmaskedPath string) { + dirFd, dirPath := -int(unix.EBADF), "." + if dir != nil { + dirFd, dirPath = int(dir.Fd()), dir.Name() + } + if !filepath.IsAbs(path) { + // only prepend the dirfd path for relative paths + path = dirPath + "/" + path + } + // NOTE: If path is "." or "", the returned path won't be filepath.Clean, + // but that's okay since this path is either used for errors (in which case + // a trailing "/" or "/." is important information) or will be + // filepath.Clean'd later (in the case of fd.Openat). + return dirFd, path +} + +// Openat is an [Fd]-based wrapper around unix.Openat. +func Openat(dir Fd, path string, flags int, mode int) (*os.File, error) { //nolint:unparam // wrapper func + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + flags |= unix.O_CLOEXEC + fd, err := unix.Openat(dirFd, path, flags, uint32(mode)) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + // openat is only used with lexically-safe paths so we can use + // filepath.Clean here, and also the path itself is not going to be used + // for actual path operations. + fullPath = filepath.Clean(fullPath) + return os.NewFile(uintptr(fd), fullPath), nil +} + +// Fstatat is an [Fd]-based wrapper around unix.Fstatat. +func Fstatat(dir Fd, path string, flags int) (unix.Stat_t, error) { + dirFd, fullPath := prepareAt(dir, path) + var stat unix.Stat_t + if err := unix.Fstatat(dirFd, path, &stat, flags); err != nil { + return stat, &os.PathError{Op: "fstatat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return stat, nil +} + +// Faccessat is an [Fd]-based wrapper around unix.Faccessat. +func Faccessat(dir Fd, path string, mode uint32, flags int) error { + dirFd, fullPath := prepareAt(dir, path) + err := unix.Faccessat(dirFd, path, mode, flags) + if err != nil { + err = &os.PathError{Op: "faccessat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return err +} + +// Readlinkat is an [Fd]-based wrapper around unix.Readlinkat. +func Readlinkat(dir Fd, path string) (string, error) { + dirFd, fullPath := prepareAt(dir, path) + size := 4096 + for { + linkBuf := make([]byte, size) + n, err := unix.Readlinkat(dirFd, path, linkBuf) + if err != nil { + return "", &os.PathError{Op: "readlinkat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + if n != size { + return string(linkBuf[:n]), nil + } + // Possible truncation, resize the buffer. + size *= 2 + } +} + +const ( + // STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to + // avoid bumping the requirement for a single constant we can just define it + // ourselves. + _STATX_MNT_ID_UNIQUE = 0x4000 //nolint:revive // unix.* name + + // We don't care which mount ID we get. The kernel will give us the unique + // one if it is supported. If the kernel doesn't support + // STATX_MNT_ID_UNIQUE, the bit is ignored and the returned request mask + // will only contain STATX_MNT_ID (if supported). + wantStatxMntMask = _STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID +) + +var hasStatxMountID = gocompat.SyncOnceValue(func() bool { + var stx unix.Statx_t + err := unix.Statx(-int(unix.EBADF), "/", 0, wantStatxMntMask, &stx) + return err == nil && stx.Mask&wantStatxMntMask != 0 +}) + +// GetMountID gets the mount identifier associated with the fd and path +// combination. It is effectively a wrapper around fetching +// STATX_MNT_ID{,_UNIQUE} with unix.Statx, but with a fallback to 0 if the +// kernel doesn't support the feature. +func GetMountID(dir Fd, path string) (uint64, error) { + // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. + if !hasStatxMountID() { + return 0, nil + } + + dirFd, fullPath := prepareAt(dir, path) + + var stx unix.Statx_t + err := unix.Statx(dirFd, path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, wantStatxMntMask, &stx) + if stx.Mask&wantStatxMntMask == 0 { + // It's not a kernel limitation, for some reason we couldn't get a + // mount ID. Assume it's some kind of attack. + err = fmt.Errorf("could not get mount id: %w", err) + } + if err != nil { + return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return stx.Mnt_id, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go new file mode 100644 index 0000000000..d2206a386f --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2025 Aleksa Sarai +// Copyright (C) 2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package fd provides a drop-in interface-based replacement of [*os.File] that +// allows for things like noop-Close wrappers to be used. +// +// [*os.File]: https://pkg.go.dev/os#File +package fd + +import ( + "io" + "os" +) + +// Fd is an interface that mirrors most of the API of [*os.File], allowing you +// to create wrappers that can be used in place of [*os.File]. +// +// [*os.File]: https://pkg.go.dev/os#File +type Fd interface { + io.Closer + Name() string + Fd() uintptr +} + +// Compile-time interface checks. +var ( + _ Fd = (*os.File)(nil) + _ Fd = noClose{} +) + +type noClose struct{ inner Fd } + +func (f noClose) Name() string { return f.inner.Name() } +func (f noClose) Fd() uintptr { return f.inner.Fd() } + +func (f noClose) Close() error { return nil } + +// NopCloser returns an [*os.File]-like object where the [Close] method is now +// a no-op. +// +// Note that for [*os.File] and similar objects, the Go garbage collector will +// still call [Close] on the underlying file unless you use +// [runtime.SetFinalizer] to disable this behaviour. This is up to the caller +// to do (if necessary). +// +// [*os.File]: https://pkg.go.dev/os#File +// [Close]: https://pkg.go.dev/io#Closer +// [runtime.SetFinalizer]: https://pkg.go.dev/runtime#SetFinalizer +func NopCloser(f Fd) Fd { return noClose{inner: f} } diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go new file mode 100644 index 0000000000..e1ec3c0b8e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" +) + +// DupWithName creates a new file descriptor referencing the same underlying +// file, but with the provided name instead of fd.Name(). +func DupWithName(fd Fd, name string) (*os.File, error) { + fd2, err := unix.FcntlInt(fd.Fd(), unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) + } + runtime.KeepAlive(fd) + return os.NewFile(uintptr(fd2), name), nil +} + +// Dup creates a new file description referencing the same underlying file. +func Dup(fd Fd) (*os.File, error) { + return DupWithName(fd, fd.Name()) +} + +// Fstat is an [Fd]-based wrapper around unix.Fstat. +func Fstat(fd Fd) (unix.Stat_t, error) { + var stat unix.Stat_t + if err := unix.Fstat(int(fd.Fd()), &stat); err != nil { + return stat, &os.PathError{Op: "fstat", Path: fd.Name(), Err: err} + } + runtime.KeepAlive(fd) + return stat, nil +} + +// Fstatfs is an [Fd]-based wrapper around unix.Fstatfs. +func Fstatfs(fd Fd) (unix.Statfs_t, error) { + var statfs unix.Statfs_t + if err := unix.Fstatfs(int(fd.Fd()), &statfs); err != nil { + return statfs, &os.PathError{Op: "fstatfs", Path: fd.Name(), Err: err} + } + runtime.KeepAlive(fd) + return statfs, nil +} + +// IsDeadInode detects whether the file has been unlinked from a filesystem and +// is thus a "dead inode" from the kernel's perspective. +func IsDeadInode(file Fd) error { + // If the nlink of a file drops to 0, there is an attacker deleting + // directories during our walk, which could result in weird /proc values. + // It's better to error out in this case. + stat, err := Fstat(file) + if err != nil { + return fmt.Errorf("check for dead inode: %w", err) + } + if stat.Nlink == 0 { + err := internal.ErrDeletedInode + if stat.Mode&unix.S_IFMT == unix.S_IFDIR { + err = internal.ErrInvalidDirectory + } + return fmt.Errorf("%w %q", err, file.Name()) + } + return nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go new file mode 100644 index 0000000000..77549c7a99 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +// Fsopen is an [Fd]-based wrapper around unix.Fsopen. +func Fsopen(fsName string, flags int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSOPEN_CLOEXEC + fd, err := unix.Fsopen(fsName, flags) + if err != nil { + return nil, os.NewSyscallError("fsopen "+fsName, err) + } + return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil +} + +// Fsmount is an [Fd]-based wrapper around unix.Fsmount. +func Fsmount(ctx Fd, flags, mountAttrs int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSMOUNT_CLOEXEC + fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) + if err != nil { + return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) + } + return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil +} + +// OpenTree is an [Fd]-based wrapper around unix.OpenTree. +func OpenTree(dir Fd, path string, flags uint) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + flags |= unix.OPEN_TREE_CLOEXEC + fd, err := unix.OpenTree(dirFd, path, flags) + if err != nil { + return nil, &os.PathError{Op: "open_tree", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go new file mode 100644 index 0000000000..3e937fe3c1 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package fd + +import ( + "errors" + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { + // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve + // ".." while a mount or rename occurs anywhere on the system. This could + // happen spuriously, or as the result of an attacker trying to mess with + // us during lookup. + // + // In addition, scoped lookups have a "safety check" at the end of + // complete_walk which will return -EXDEV if the final path is not in the + // root. + return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 && + (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) +} + +// This is a fairly arbitrary limit we have just to avoid an attacker being +// able to make us spin in an infinite retry loop -- callers can choose to +// retry on EAGAIN if they prefer. +const scopedLookupMaxRetries = 128 + +// Openat2 is an [Fd]-based wrapper around unix.Openat2, but with some retry +// logic in case of EAGAIN errors. +func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + // Make sure we always set O_CLOEXEC. + how.Flags |= unix.O_CLOEXEC + var tries int + for { + fd, err := unix.Openat2(dirFd, path, how) + if err != nil { + if scopedLookupShouldRetry(how, err) && tries < scopedLookupMaxRetries { + // We retry a couple of times to avoid the spurious errors, and + // if we are being attacked then returning -EAGAIN is the best + // we can do. + tries++ + continue + } + return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil + } +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md new file mode 100644 index 0000000000..5dcb6ae007 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md @@ -0,0 +1,10 @@ +## gocompat ## + +This directory contains backports of stdlib functions from later Go versions so +the filepath-securejoin can continue to be used by projects that are stuck with +Go 1.18 support. Note that often filepath-securejoin is added in security +patches for old releases, so avoiding the need to bump Go compiler requirements +is a huge plus to downstreams. + +The source code is licensed under the same license as the Go stdlib. See the +source files for the precise license information. diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go new file mode 100644 index 0000000000..4b1803f580 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build linux && go1.20 + +// Copyright (C) 2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gocompat includes compatibility shims (backported from future Go +// stdlib versions) to permit filepath-securejoin to be used with older Go +// versions (often filepath-securejoin is added in security patches for old +// releases, so avoiding the need to bump Go compiler requirements is a huge +// plus to downstreams). +package gocompat diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go new file mode 100644 index 0000000000..4a114bd3da --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: BSD-3-Clause +//go:build linux && go1.20 + +// Copyright (C) 2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "fmt" +) + +// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except +// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) +// is only guaranteed to give you baseErr. +func WrapBaseError(baseErr, extraErr error) error { + return fmt.Errorf("%w: %w", extraErr, baseErr) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go new file mode 100644 index 0000000000..3061016a6a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && !go1.20 + +// Copyright (C) 2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "fmt" +) + +type wrappedError struct { + inner error + isError error +} + +func (err wrappedError) Is(target error) bool { + return err.isError == target +} + +func (err wrappedError) Unwrap() error { + return err.inner +} + +func (err wrappedError) Error() string { + return fmt.Sprintf("%v: %v", err.isError, err.inner) +} + +// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except +// that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) +// is only guaranteed to give you baseErr. +func WrapBaseError(baseErr, extraErr error) error { + return wrappedError{ + inner: baseErr, + isError: extraErr, + } +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go new file mode 100644 index 0000000000..d4a938186e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && go1.21 + +// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocompat + +import ( + "cmp" + "slices" + "sync" +) + +// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. +func SlicesDeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S { + return slices.DeleteFunc(slice, delFn) +} + +// SlicesContains is equivalent to Go 1.21's slices.Contains. +func SlicesContains[S ~[]E, E comparable](slice S, val E) bool { + return slices.Contains(slice, val) +} + +// SlicesClone is equivalent to Go 1.21's slices.Clone. +func SlicesClone[S ~[]E, E any](slice S) S { + return slices.Clone(slice) +} + +// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. +func SyncOnceValue[T any](f func() T) func() T { + return sync.OnceValue(f) +} + +// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. +func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { + return sync.OnceValues(f) +} + +// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. +type CmpOrdered = cmp.Ordered + +// CmpCompare is equivalent to Go 1.21's cmp.Compare. +func CmpCompare[T CmpOrdered](x, y T) int { + return cmp.Compare(x, y) +} + +// Max2 is equivalent to Go 1.21's max builtin (but only for two parameters). +func Max2[T CmpOrdered](x, y T) T { + return max(x, y) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go new file mode 100644 index 0000000000..0ea6218aa6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux && !go1.21 + +// Copyright (C) 2021, 2022 The Go Authors. All rights reserved. +// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.BSD file. + +package gocompat + +import ( + "sync" +) + +// These are very minimal implementations of functions that appear in Go 1.21's +// stdlib, included so that we can build on older Go versions. Most are +// borrowed directly from the stdlib, and a few are modified to be "obviously +// correct" without needing to copy too many other helpers. + +// clearSlice is equivalent to Go 1.21's builtin clear. +// Copied from the Go 1.24 stdlib implementation. +func clearSlice[S ~[]E, E any](slice S) { + var zero E + for i := range slice { + slice[i] = zero + } +} + +// slicesIndexFunc is equivalent to Go 1.21's slices.IndexFunc. +// Copied from the Go 1.24 stdlib implementation. +func slicesIndexFunc[S ~[]E, E any](s S, f func(E) bool) int { + for i := range s { + if f(s[i]) { + return i + } + } + return -1 +} + +// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. +// Copied from the Go 1.24 stdlib implementation. +func SlicesDeleteFunc[S ~[]E, E any](s S, del func(E) bool) S { + i := slicesIndexFunc(s, del) + if i == -1 { + return s + } + // Don't start copying elements until we find one to delete. + for j := i + 1; j < len(s); j++ { + if v := s[j]; !del(v) { + s[i] = v + i++ + } + } + clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC + return s[:i] +} + +// SlicesContains is equivalent to Go 1.21's slices.Contains. +// Similar to the stdlib slices.Contains, except that we don't have +// slices.Index so we need to use slices.IndexFunc for this non-Func helper. +func SlicesContains[S ~[]E, E comparable](s S, v E) bool { + return slicesIndexFunc(s, func(e E) bool { return e == v }) >= 0 +} + +// SlicesClone is equivalent to Go 1.21's slices.Clone. +// Copied from the Go 1.24 stdlib implementation. +func SlicesClone[S ~[]E, E any](s S) S { + // Preserve nil in case it matters. + if s == nil { + return nil + } + return append(S([]E{}), s...) +} + +// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. +// Copied from the Go 1.25 stdlib implementation. +func SyncOnceValue[T any](f func() T) func() T { + // Use a struct so that there's a single heap allocation. + d := struct { + f func() T + once sync.Once + valid bool + p any + result T + }{ + f: f, + } + return func() T { + d.once.Do(func() { + defer func() { + d.f = nil + d.p = recover() + if !d.valid { + panic(d.p) + } + }() + d.result = d.f() + d.valid = true + }) + if !d.valid { + panic(d.p) + } + return d.result + } +} + +// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. +// Copied from the Go 1.25 stdlib implementation. +func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { + // Use a struct so that there's a single heap allocation. + d := struct { + f func() (T1, T2) + once sync.Once + valid bool + p any + r1 T1 + r2 T2 + }{ + f: f, + } + return func() (T1, T2) { + d.once.Do(func() { + defer func() { + d.f = nil + d.p = recover() + if !d.valid { + panic(d.p) + } + }() + d.r1, d.r2 = d.f() + d.valid = true + }) + if !d.valid { + panic(d.p) + } + return d.r1, d.r2 + } +} + +// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. +// Copied from the Go 1.25 stdlib implementation. +type CmpOrdered interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | + ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | + ~float32 | ~float64 | + ~string +} + +// isNaN reports whether x is a NaN without requiring the math package. +// This will always return false if T is not floating-point. +// Copied from the Go 1.25 stdlib implementation. +func isNaN[T CmpOrdered](x T) bool { + return x != x +} + +// CmpCompare is equivalent to Go 1.21's cmp.Compare. +// Copied from the Go 1.25 stdlib implementation. +func CmpCompare[T CmpOrdered](x, y T) int { + xNaN := isNaN(x) + yNaN := isNaN(y) + if xNaN { + if yNaN { + return 0 + } + return -1 + } + if yNaN { + return +1 + } + if x < y { + return -1 + } + if x > y { + return +1 + } + return 0 +} + +// Max2 is equivalent to Go 1.21's max builtin for two parameters. +func Max2[T CmpOrdered](x, y T) T { + m := x + if y > m { + m = y + } + return m +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/doc.go new file mode 100644 index 0000000000..2ddb71e844 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/doc.go @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package gopathrs is a less complete pure Go implementation of some of the +// APIs provided by [libpathrs]. +// +// [libpathrs]: https://github.com/cyphar/libpathrs +package gopathrs diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/lookup_linux.go new file mode 100644 index 0000000000..56480f0cee --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/lookup_linux.go @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package gopathrs + +import ( + "errors" + "fmt" + "os" + "path" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/internal/consts" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" +) + +type symlinkStackEntry struct { + // (dir, remainingPath) is what we would've returned if the link didn't + // exist. This matches what openat2(RESOLVE_IN_ROOT) would return in + // this case. + dir *os.File + remainingPath string + // linkUnwalked is the remaining path components from the original + // Readlink which we have yet to walk. When this slice is empty, we + // drop the link from the stack. + linkUnwalked []string +} + +func (se symlinkStackEntry) String() string { + return fmt.Sprintf("<%s>/%s [->%s]", se.dir.Name(), se.remainingPath, strings.Join(se.linkUnwalked, "/")) +} + +func (se symlinkStackEntry) Close() { + _ = se.dir.Close() +} + +type symlinkStack []*symlinkStackEntry + +func (s *symlinkStack) IsEmpty() bool { + return s == nil || len(*s) == 0 +} + +func (s *symlinkStack) Close() { + if s != nil { + for _, link := range *s { + link.Close() + } + // TODO: Switch to clear once we switch to Go 1.21. + *s = nil + } +} + +var ( + errEmptyStack = errors.New("[internal] stack is empty") + errBrokenSymlinkStack = errors.New("[internal error] broken symlink stack") +) + +func (s *symlinkStack) popPart(part string) error { + if s == nil || s.IsEmpty() { + // If there is nothing in the symlink stack, then the part was from the + // real path provided by the user, and this is a no-op. + return errEmptyStack + } + if part == "." { + // "." components are no-ops -- we drop them when doing SwapLink. + return nil + } + + tailEntry := (*s)[len(*s)-1] + + // Double-check that we are popping the component we expect. + if len(tailEntry.linkUnwalked) == 0 { + return fmt.Errorf("%w: trying to pop component %q of empty stack entry %s", errBrokenSymlinkStack, part, tailEntry) + } + headPart := tailEntry.linkUnwalked[0] + if headPart != part { + return fmt.Errorf("%w: trying to pop component %q but the last stack entry is %s (%q)", errBrokenSymlinkStack, part, tailEntry, headPart) + } + + // Drop the component, but keep the entry around in case we are dealing + // with a "tail-chained" symlink. + tailEntry.linkUnwalked = tailEntry.linkUnwalked[1:] + return nil +} + +func (s *symlinkStack) PopPart(part string) error { + if err := s.popPart(part); err != nil { + if errors.Is(err, errEmptyStack) { + // Skip empty stacks. + err = nil + } + return err + } + + // Clean up any of the trailing stack entries that are empty. + for lastGood := len(*s) - 1; lastGood >= 0; lastGood-- { + entry := (*s)[lastGood] + if len(entry.linkUnwalked) > 0 { + break + } + entry.Close() + (*s) = (*s)[:lastGood] + } + return nil +} + +func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error { + if s == nil { + return nil + } + // Split the link target and clean up any "" parts. + linkTargetParts := gocompat.SlicesDeleteFunc( + strings.Split(linkTarget, "/"), + func(part string) bool { return part == "" || part == "." }) + + // Copy the directory so the caller doesn't close our copy. + dirCopy, err := fd.Dup(dir) + if err != nil { + return err + } + + // Add to the stack. + *s = append(*s, &symlinkStackEntry{ + dir: dirCopy, + remainingPath: remainingPath, + linkUnwalked: linkTargetParts, + }) + return nil +} + +func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, linkTarget string) error { + // If we are currently inside a symlink resolution, remove the symlink + // component from the last symlink entry, but don't remove the entry even + // if it's empty. If we are a "tail-chained" symlink (a trailing symlink we + // hit during a symlink resolution) we need to keep the old symlink until + // we finish the resolution. + if err := s.popPart(linkPart); err != nil { + if !errors.Is(err, errEmptyStack) { + return err + } + // Push the component regardless of whether the stack was empty. + } + return s.push(dir, remainingPath, linkTarget) +} + +func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) { + if s == nil || s.IsEmpty() { + return nil, "", false + } + tailEntry := (*s)[0] + *s = (*s)[1:] + return tailEntry.dir, tailEntry.remainingPath, true +} + +// PartialLookupInRoot tries to lookup as much of the request path as possible +// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing +// component of the requested path, returning a file handle to the final +// existing component and a string containing the remaining path components. +func PartialLookupInRoot(root fd.Fd, unsafePath string) (*os.File, string, error) { + return lookupInRoot(root, unsafePath, true) +} + +func completeLookupInRoot(root fd.Fd, unsafePath string) (*os.File, error) { + handle, remainingPath, err := lookupInRoot(root, unsafePath, false) + if remainingPath != "" && err == nil { + // should never happen + err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath) + } + // lookupInRoot(partial=false) will always close the handle if an error is + // returned, so no need to double-check here. + return handle, err +} + +func lookupInRoot(root fd.Fd, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) { + unsafePath = filepath.ToSlash(unsafePath) // noop + + // This is very similar to SecureJoin, except that we operate on the + // components using file descriptors. We then return the last component we + // managed open, along with the remaining path components not opened. + + // Try to use openat2 if possible. + if linux.HasOpenat2() { + return lookupOpenat2(root, unsafePath, partial) + } + + // Get the "actual" root path from /proc/self/fd. This is necessary if the + // root is some magic-link like /proc/$pid/root, in which case we want to + // make sure when we do procfs.CheckProcSelfFdPath that we are using the + // correct root path. + logicalRootPath, err := procfs.ProcSelfFdReadlink(root) + if err != nil { + return nil, "", fmt.Errorf("get real root path: %w", err) + } + + currentDir, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } + defer func() { + // If a handle is not returned, close the internal handle. + if Handle == nil { + _ = currentDir.Close() + } + }() + + // symlinkStack is used to emulate how openat2(RESOLVE_IN_ROOT) treats + // dangling symlinks. If we hit a non-existent path while resolving a + // symlink, we need to return the (dir, remainingPath) that we had when we + // hit the symlink (treating the symlink as though it were a regular file). + // The set of (dir, remainingPath) sets is stored within the symlinkStack + // and we add and remove parts when we hit symlink and non-symlink + // components respectively. We need a stack because of recursive symlinks + // (symlinks that contain symlink components in their target). + // + // Note that the stack is ONLY used for book-keeping. All of the actual + // path walking logic is still based on currentPath/remainingPath and + // currentDir (as in SecureJoin). + var symStack *symlinkStack + if partial { + symStack = new(symlinkStack) + defer symStack.Close() + } + + var ( + linksWalked int + currentPath string + remainingPath = unsafePath + ) + for remainingPath != "" { + // Save the current remaining path so if the part is not real we can + // return the path including the component. + oldRemainingPath := remainingPath + + // Get the next path component. + var part string + if i := strings.IndexByte(remainingPath, '/'); i == -1 { + part, remainingPath = remainingPath, "" + } else { + part, remainingPath = remainingPath[:i], remainingPath[i+1:] + } + // If we hit an empty component, we need to treat it as though it is + // "." so that trailing "/" and "//" components on a non-directory + // correctly return the right error code. + if part == "" { + part = "." + } + + // Apply the component lexically to the path we are building. + // currentPath does not contain any symlinks, and we are lexically + // dealing with a single component, so it's okay to do a filepath.Clean + // here. + nextPath := path.Join("/", currentPath, part) + // If we logically hit the root, just clone the root rather than + // opening the part and doing all of the other checks. + if nextPath == "/" { + if err := symStack.PopPart(part); err != nil { + return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err) + } + // Jump to root. + rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } + _ = currentDir.Close() + currentDir = rootClone + currentPath = nextPath + continue + } + + // Try to open the next component. + nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + switch err { + case nil: + st, err := nextDir.Stat() + if err != nil { + _ = nextDir.Close() + return nil, "", fmt.Errorf("stat component %q: %w", part, err) + } + + switch st.Mode() & os.ModeType { //nolint:exhaustive // just a glorified if statement + case os.ModeSymlink: + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See + // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and + // fstatat() with empty relative pathnames"). + linkDest, err := fd.Readlinkat(nextDir, "") + // We don't need the handle anymore. + _ = nextDir.Close() + if err != nil { + return nil, "", err + } + + linksWalked++ + if linksWalked > consts.MaxSymlinkLimit { + return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP} + } + + // Swap out the symlink's component for the link entry itself. + if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil { + return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err) + } + + // Update our logical remaining path. + remainingPath = linkDest + "/" + remainingPath + // Absolute symlinks reset any work we've already done. + if path.IsAbs(linkDest) { + // Jump to root. + rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } + _ = currentDir.Close() + currentDir = rootClone + currentPath = "/" + } + + default: + // If we are dealing with a directory, simply walk into it. + _ = currentDir.Close() + currentDir = nextDir + currentPath = nextPath + + // The part was real, so drop it from the symlink stack. + if err := symStack.PopPart(part); err != nil { + return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err) + } + + // If we are operating on a .., make sure we haven't escaped. + // We only have to check for ".." here because walking down + // into a regular component component cannot cause you to + // escape. This mirrors the logic in RESOLVE_IN_ROOT, except we + // have to check every ".." rather than only checking after a + // rename or mount on the system. + if part == ".." { + // Make sure the root hasn't moved. + if err := procfs.CheckProcSelfFdPath(logicalRootPath, root); err != nil { + return nil, "", fmt.Errorf("root path moved during lookup: %w", err) + } + // Make sure the path is what we expect. + fullPath := logicalRootPath + nextPath + if err := procfs.CheckProcSelfFdPath(fullPath, currentDir); err != nil { + return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err) + } + } + } + + default: + if !partial { + return nil, "", err + } + // If there are any remaining components in the symlink stack, we + // are still within a symlink resolution and thus we hit a dangling + // symlink. So pretend that the first symlink in the stack we hit + // was an ENOENT (to match openat2). + if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok { + _ = currentDir.Close() + return oldDir, remainingPath, err + } + // We have hit a final component that doesn't exist, so we have our + // partial open result. Note that we have to use the OLD remaining + // path, since the lookup failed. + return currentDir, oldRemainingPath, err + } + } + + // If the unsafePath had a trailing slash, we need to make sure we try to + // do a relative "." open so that we will correctly return an error when + // the final component is a non-directory (to match openat2). In the + // context of openat2, a trailing slash and a trailing "/." are completely + // equivalent. + if strings.HasSuffix(unsafePath, "/") { + nextDir, err := fd.Openat(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + if !partial { + _ = currentDir.Close() + currentDir = nil + } + return currentDir, "", err + } + _ = currentDir.Close() + currentDir = nextDir + } + + // All of the components existed! + return currentDir, "", nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/mkdir_linux.go new file mode 100644 index 0000000000..21a5593f44 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/mkdir_linux.go @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package gopathrs + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" +) + +// ErrInvalidMode is returned from [MkdirAll] when the requested mode is +// invalid. +var ErrInvalidMode = errors.New("invalid permission mode") + +// modePermExt is like os.ModePerm except that it also includes the set[ug]id +// and sticky bits. +const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky + +//nolint:cyclop // this function needs to handle a lot of cases +func toUnixMode(mode os.FileMode) (uint32, error) { + sysMode := uint32(mode.Perm()) + if mode&os.ModeSetuid != 0 { + sysMode |= unix.S_ISUID + } + if mode&os.ModeSetgid != 0 { + sysMode |= unix.S_ISGID + } + if mode&os.ModeSticky != 0 { + sysMode |= unix.S_ISVTX + } + // We don't allow file type bits. + if mode&os.ModeType != 0 { + return 0, fmt.Errorf("%w %+.3o (%s): type bits not permitted", ErrInvalidMode, mode, mode) + } + // We don't allow other unknown modes. + if mode&^modePermExt != 0 || sysMode&unix.S_IFMT != 0 { + return 0, fmt.Errorf("%w %+.3o (%s): unknown mode bits", ErrInvalidMode, mode, mode) + } + return sysMode, nil +} + +// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use +// in two respects: +// +// - The caller provides the root directory as an *[os.File] (preferably O_PATH) +// handle. This means that the caller can be sure which root directory is +// being used. Note that this can be emulated by using /proc/self/fd/... as +// the root path with [os.MkdirAll]. +// +// - Once all of the directories have been created, an *[os.File] O_PATH handle +// to the directory at unsafePath is returned to the caller. This is done in +// an effectively-race-free way (an attacker would only be able to swap the +// final directory component), which is not possible to emulate with +// [MkdirAll]. +// +// In addition, the returned handle is obtained far more efficiently than doing +// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after +// doing [MkdirAll]. If you intend to open the directory after creating it, you +// should use MkdirAllHandle. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) { + unixMode, err := toUnixMode(mode) + if err != nil { + return nil, err + } + // On Linux, mkdirat(2) (and os.Mkdir) silently ignore the suid and sgid + // bits. We could also silently ignore them but since we have very few + // users it seems more prudent to return an error so users notice that + // these bits will not be set. + if unixMode&^0o1777 != 0 { + return nil, fmt.Errorf("%w for mkdir %+.3o: suid and sgid are ignored by mkdir", ErrInvalidMode, mode) + } + + // Try to open as much of the path as possible. + currentDir, remainingPath, err := PartialLookupInRoot(root, unsafePath) + defer func() { + if Err != nil { + _ = currentDir.Close() + } + }() + if err != nil && !errors.Is(err, unix.ENOENT) { + return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err) + } + + // If there is an attacker deleting directories as we walk into them, + // detect this proactively. Note this is guaranteed to detect if the + // attacker deleted any part of the tree up to currentDir. + // + // Once we walk into a dead directory, partialLookupInRoot would not be + // able to walk further down the tree (directories must be empty before + // they are deleted), and if the attacker has removed the entire tree we + // can be sure that anything that was originally inside a dead directory + // must also be deleted and thus is a dead directory in its own right. + // + // This is mostly a quality-of-life check, because mkdir will simply fail + // later if the attacker deletes the tree after this check. + if err := fd.IsDeadInode(currentDir); err != nil { + return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err) + } + + // Re-open the path to match the O_DIRECTORY reopen loop later (so that we + // always return a non-O_PATH handle). We also check that we actually got a + // directory. + if reopenDir, err := procfs.ReopenFd(currentDir, unix.O_DIRECTORY|unix.O_CLOEXEC); errors.Is(err, unix.ENOTDIR) { + return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR) + } else if err != nil { + return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err) + } else { //nolint:revive // indent-error-flow lint doesn't make sense here + _ = currentDir.Close() + currentDir = reopenDir + } + + remainingParts := strings.Split(remainingPath, string(filepath.Separator)) + if gocompat.SlicesContains(remainingParts, "..") { + // The path contained ".." components after the end of the "real" + // components. We could try to safely resolve ".." here but that would + // add a bunch of extra logic for something that it's not clear even + // needs to be supported. So just return an error. + // + // If we do filepath.Clean(remainingPath) then we end up with the + // problem that ".." can erase a trailing dangling symlink and produce + // a path that doesn't quite match what the user asked for. + return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath) + } + + // Create the remaining components. + for _, part := range remainingParts { + switch part { + case "", ".": + // Skip over no-op paths. + continue + } + + // NOTE: mkdir(2) will not follow trailing symlinks, so we can safely + // create the final component without worrying about symlink-exchange + // attacks. + // + // If we get -EEXIST, it's possible that another program created the + // directory at the same time as us. In that case, just continue on as + // if we created it (if the created inode is not a directory, the + // following open call will fail). + if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) { + err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err} + // Make the error a bit nicer if the directory is dead. + if deadErr := fd.IsDeadInode(currentDir); deadErr != nil { + // TODO: Once we bump the minimum Go version to 1.20, we can use + // multiple %w verbs for this wrapping. For now we need to use a + // compatibility shim for older Go versions. + // err = fmt.Errorf("%w (%w)", err, deadErr) + err = gocompat.WrapBaseError(err, deadErr) + } + return nil, err + } + + // Get a handle to the next component. O_DIRECTORY means we don't need + // to use O_PATH. + var nextDir *os.File + if linux.HasOpenat2() { + nextDir, err = openat2(currentDir, part, &unix.OpenHow{ + Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV, + }) + } else { + nextDir, err = fd.Openat(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + } + if err != nil { + return nil, err + } + _ = currentDir.Close() + currentDir = nextDir + + // It's possible that the directory we just opened was swapped by an + // attacker. Unfortunately there isn't much we can do to protect + // against this, and MkdirAll's behaviour is that we will reuse + // existing directories anyway so the need to protect against this is + // incredibly limited (and arguably doesn't even deserve mention here). + // + // Ideally we might want to check that the owner and mode match what we + // would've created -- unfortunately, it is non-trivial to verify that + // the owner and mode of the created directory match. While plain Unix + // DAC rules seem simple enough to emulate, there are a bunch of other + // factors that can change the mode or owner of created directories + // (default POSIX ACLs, mount options like uid=1,gid=2,umask=0 on + // filesystems like vfat, etc etc). We used to try to verify this but + // it just lead to a series of spurious errors. + // + // We could also check that the directory is non-empty, but + // unfortunately some pseduofilesystems (like cgroupfs) create + // non-empty directories, which would result in different spurious + // errors. + } + return currentDir, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/open_linux.go new file mode 100644 index 0000000000..cd9632a958 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/open_linux.go @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package gopathrs + +import ( + "os" +) + +// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided +// using an *[os.File] handle, to ensure that the correct root directory is used. +func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { + handle, err := completeLookupInRoot(root, unsafePath) + if err != nil { + return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err} + } + return handle, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/openat2_linux.go new file mode 100644 index 0000000000..b80ecd0895 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs/openat2_linux.go @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package gopathrs + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func openat2(dir fd.Fd, path string, how *unix.OpenHow) (*os.File, error) { + file, err := fd.Openat2(dir, path, how) + if err != nil { + return nil, err + } + // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong. + if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT { + if actualPath, err := procfs.ProcSelfFdReadlink(file); err == nil { + // TODO: Ideally we would not need to dup the fd, but you cannot + // easily just swap an *os.File with one from the same fd + // (the GC will close the old one, and you cannot clear the + // finaliser easily because it is associated with an internal + // field of *os.File not *os.File itself). + newFile, err := fd.DupWithName(file, actualPath) + if err != nil { + return nil, err + } + file = newFile + } + } + return file, nil +} + +func lookupOpenat2(root fd.Fd, unsafePath string, partial bool) (*os.File, string, error) { + if !partial { + file, err := openat2(root, unsafePath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, + }) + return file, "", err + } + return partialLookupOpenat2(root, unsafePath) +} + +// partialLookupOpenat2 is an alternative implementation of +// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a +// handle to the deepest existing child of the requested path within the root. +func partialLookupOpenat2(root fd.Fd, unsafePath string) (*os.File, string, error) { + // TODO: Implement this as a git-bisect-like binary search. + + unsafePath = filepath.ToSlash(unsafePath) // noop + endIdx := len(unsafePath) + var lastError error + for endIdx > 0 { + subpath := unsafePath[:endIdx] + + handle, err := openat2(root, subpath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, + }) + if err == nil { + // Jump over the slash if we have a non-"" remainingPath. + if endIdx < len(unsafePath) { + endIdx++ + } + // We found a subpath! + return handle, unsafePath[endIdx:], lastError + } + if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) { + // That path doesn't exist, let's try the next directory up. + endIdx = strings.LastIndexByte(subpath, '/') + lastError = err + continue + } + return nil, "", fmt.Errorf("open subpath: %w", err) + } + // If we couldn't open anything, the whole subpath is missing. Return a + // copy of the root fd so that the caller doesn't close this one by + // accident. + rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", err + } + return rootClone, unsafePath, lastError +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go new file mode 100644 index 0000000000..cb6de41861 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2022 The Go Authors. All rights reserved. +// Copyright (C) 2025 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE.BSD file. + +// The parsing logic is very loosely based on the Go stdlib's +// src/internal/syscall/unix/kernel_version_linux.go but with an API that looks +// a bit like runc's libcontainer/system/kernelversion. +// +// TODO(cyphar): This API has been copied around to a lot of different projects +// (Docker, containerd, runc, and now filepath-securejoin) -- maybe we should +// put it in a separate project? + +// Package kernelversion provides a simple mechanism for checking whether the +// running kernel is at least as new as some baseline kernel version. This is +// often useful when checking for features that would be too complicated to +// test support for (or in cases where we know that some kernel features in +// backport-heavy kernels are broken and need to be avoided). +package kernelversion + +import ( + "bytes" + "errors" + "fmt" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// KernelVersion is a numeric representation of the key numerical elements of a +// kernel version (for instance, "4.1.2-default-1" would be represented as +// KernelVersion{4, 1, 2}). +type KernelVersion []uint64 + +func (kver KernelVersion) String() string { + var str strings.Builder + for idx, elem := range kver { + if idx != 0 { + _, _ = str.WriteRune('.') + } + _, _ = str.WriteString(strconv.FormatUint(elem, 10)) + } + return str.String() +} + +var errInvalidKernelVersion = errors.New("invalid kernel version") + +// parseKernelVersion parses a string and creates a KernelVersion based on it. +func parseKernelVersion(kverStr string) (KernelVersion, error) { + kver := make(KernelVersion, 1, 3) + for idx, ch := range kverStr { + if '0' <= ch && ch <= '9' { + v := &kver[len(kver)-1] + *v = (*v * 10) + uint64(ch-'0') + } else { + if idx == 0 || kverStr[idx-1] < '0' || '9' < kverStr[idx-1] { + // "." must be preceded by a digit while in version section + return nil, fmt.Errorf("%w %q: kernel version has dot(s) followed by non-digit in version section", errInvalidKernelVersion, kverStr) + } + if ch != '.' { + break + } + kver = append(kver, 0) + } + } + if len(kver) < 2 { + return nil, fmt.Errorf("%w %q: kernel versions must contain at least two components", errInvalidKernelVersion, kverStr) + } + return kver, nil +} + +// getKernelVersion gets the current kernel version. +var getKernelVersion = gocompat.SyncOnceValues(func() (KernelVersion, error) { + var uts unix.Utsname + if err := unix.Uname(&uts); err != nil { + return nil, err + } + // Remove the \x00 from the release. + release := uts.Release[:] + return parseKernelVersion(string(release[:bytes.IndexByte(release, 0)])) +}) + +// GreaterEqualThan returns true if the the host kernel version is greater than +// or equal to the provided [KernelVersion]. When doing this comparison, any +// non-numerical suffixes of the host kernel version are ignored. +// +// If the number of components provided is not equal to the number of numerical +// components of the host kernel version, any missing components are treated as +// 0. This means that GreaterEqualThan(KernelVersion{4}) will be treated the +// same as GreaterEqualThan(KernelVersion{4, 0, 0, ..., 0, 0}), and that if the +// host kernel version is "4" then GreaterEqualThan(KernelVersion{4, 1}) will +// return false (because the host version will be treated as "4.0"). +func GreaterEqualThan(wantKver KernelVersion) (bool, error) { + hostKver, err := getKernelVersion() + if err != nil { + return false, err + } + + // Pad out the kernel version lengths to match one another. + cmpLen := gocompat.Max2(len(hostKver), len(wantKver)) + hostKver = append(hostKver, make(KernelVersion, cmpLen-len(hostKver))...) + wantKver = append(wantKver, make(KernelVersion, cmpLen-len(wantKver))...) + + for i := 0; i < cmpLen; i++ { + switch gocompat.CmpCompare(hostKver[i], wantKver[i]) { + case -1: + // host < want + return false, nil + case +1: + // host > want + return true, nil + case 0: + continue + } + } + // equal version values + return true, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go new file mode 100644 index 0000000000..4635714f62 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: MPL-2.0 + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package linux returns information about what features are supported on the +// running kernel. +package linux diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go new file mode 100644 index 0000000000..b29905bff6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package linux + +import ( + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion" +) + +// HasNewMountAPI returns whether the new fsopen(2) mount API is supported on +// the running kernel. +var HasNewMountAPI = gocompat.SyncOnceValue(func() bool { + // All of the pieces of the new mount API we use (fsopen, fsconfig, + // fsmount, open_tree) were added together in Linux 5.2[1,2], so we can + // just check for one of the syscalls and the others should also be + // available. + // + // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. + // This is equivalent to openat(2), but tells us if open_tree is + // available (and thus all of the other basic new mount API syscalls). + // open_tree(2) is most light-weight syscall to test here. + // + // [1]: merge commit 400913252d09 + // [2]: + fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) + if err != nil { + return false + } + _ = unix.Close(fd) + + // RHEL 8 has a backport of fsopen(2) that appears to have some very + // difficult to debug performance pathology. As such, it seems prudent to + // simply reject pre-5.2 kernels. + isNotBackport, _ := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{5, 2}) + return isNotBackport +}) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go new file mode 100644 index 0000000000..399609dc36 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package linux + +import ( + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" +) + +// HasOpenat2 returns whether openat2(2) is supported on the running kernel. +var HasOpenat2 = gocompat.SyncOnceValue(func() bool { + fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, + }) + if err != nil { + return false + } + _ = unix.Close(fd) + return true +}) diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go new file mode 100644 index 0000000000..21e0a62e8e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package procfs provides a safe API for operating on /proc on Linux. Note +// that this is the *internal* procfs API, mainy needed due to Go's +// restrictions on cyclic dependencies and its incredibly minimal visibility +// system without making a separate internal/ package. +package procfs + +import ( + "errors" + "fmt" + "io" + "os" + "runtime" + "strconv" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" +) + +// The kernel guarantees that the root inode of a procfs mount has an +// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO. +const ( + procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC + procRootIno = 1 // PROC_ROOT_INO +) + +// verifyProcHandle checks that the handle is from a procfs filesystem. +// Contrast this to [verifyProcRoot], which also verifies that the handle is +// the root of a procfs mount. +func verifyProcHandle(procHandle fd.Fd) error { + if statfs, err := fd.Fstatfs(procHandle); err != nil { + return err + } else if statfs.Type != procSuperMagic { + return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type) + } + return nil +} + +// verifyProcRoot verifies that the handle is the root of a procfs filesystem. +// Contrast this to [verifyProcHandle], which only verifies if the handle is +// some file on procfs (regardless of what file it is). +func verifyProcRoot(procRoot fd.Fd) error { + if err := verifyProcHandle(procRoot); err != nil { + return err + } + if stat, err := fd.Fstat(procRoot); err != nil { + return err + } else if stat.Ino != procRootIno { + return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino) + } + return nil +} + +type procfsFeatures struct { + // hasSubsetPid was added in Linux 5.8, along with hidepid=ptraceable (and + // string-based hidepid= values). Before this patchset, it was not really + // safe to try to modify procfs superblock flags because the superblock was + // shared -- so if this feature is not available, **you should not set any + // superblock flags**. + // + // 6814ef2d992a ("proc: add option to mount only a pids subset") + // fa10fed30f25 ("proc: allow to mount many instances of proc in one pid namespace") + // 24a71ce5c47f ("proc: instantiate only pids that we can ptrace on 'hidepid=4' mount option") + // 1c6c4d112e81 ("proc: use human-readable values for hidepid") + // 9ff7258575d5 ("Merge branch 'proc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") + hasSubsetPid bool +} + +var getProcfsFeatures = gocompat.SyncOnceValue(func() procfsFeatures { + if !linux.HasNewMountAPI() { + return procfsFeatures{} + } + procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) + if err != nil { + return procfsFeatures{} + } + defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here + + return procfsFeatures{ + hasSubsetPid: unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") == nil, + } +}) + +func newPrivateProcMount(subset bool) (_ *Handle, Err error) { + procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) + if err != nil { + return nil, err + } + defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here + + if subset && getProcfsFeatures().hasSubsetPid { + // Try to configure hidepid=ptraceable,subset=pid if possible, but + // ignore errors. + _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable") + _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") + } + + // Get an actual handle. + if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil { + return nil, os.NewSyscallError("fsconfig create procfs", err) + } + // TODO: Output any information from the fscontext log to debug logs. + procRoot, err := fd.Fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID) + if err != nil { + return nil, err + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +func clonePrivateProcMount() (_ *Handle, Err error) { + // Try to make a clone without using AT_RECURSIVE if we can. If this works, + // we can be sure there are no over-mounts and so if the root is valid then + // we're golden. Otherwise, we have to deal with over-mounts. + procRoot, err := fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE) + if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procRoot) { + procRoot, err = fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE) + } + if err != nil { + return nil, fmt.Errorf("creating a detached procfs clone: %w", err) + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +func privateProcRoot(subset bool) (*Handle, error) { + if !linux.HasNewMountAPI() || hookForceGetProcRootUnsafe() { + return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP) + } + // Try to create a new procfs mount from scratch if we can. This ensures we + // can get a procfs mount even if /proc is fake (for whatever reason). + procRoot, err := newPrivateProcMount(subset) + if err != nil || hookForcePrivateProcRootOpenTree(procRoot) { + // Try to clone /proc then... + procRoot, err = clonePrivateProcMount() + } + return procRoot, err +} + +func unsafeHostProcRoot() (_ *Handle, Err error) { + procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + defer func() { + if Err != nil { + _ = procRoot.Close() + } + }() + return newHandle(procRoot) +} + +// Handle is a wrapper around an *os.File handle to "/proc", which can be used +// to do further procfs-related operations in a safe way. +type Handle struct { + Inner fd.Fd + // Does this handle have subset=pid set? + isSubset bool +} + +func newHandle(procRoot fd.Fd) (*Handle, error) { + if err := verifyProcRoot(procRoot); err != nil { + // This is only used in methods that + _ = procRoot.Close() + return nil, err + } + proc := &Handle{Inner: procRoot} + // With subset=pid we can be sure that /proc/uptime will not exist. + if err := fd.Faccessat(proc.Inner, "uptime", unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil { + proc.isSubset = errors.Is(err, os.ErrNotExist) + } + return proc, nil +} + +// Close closes the underlying file for the Handle. +func (proc *Handle) Close() error { return proc.Inner.Close() } + +var getCachedProcRoot = gocompat.SyncOnceValue(func() *Handle { + procRoot, err := getProcRoot(true) + if err != nil { + return nil // just don't cache if we see an error + } + if !procRoot.isSubset { + return nil // we only cache verified subset=pid handles + } + + // Disarm (*Handle).Close() to stop someone from accidentally closing + // the global handle. + procRoot.Inner = fd.NopCloser(procRoot.Inner) + return procRoot +}) + +// OpenProcRoot tries to open a "safer" handle to "/proc". +func OpenProcRoot() (*Handle, error) { + if proc := getCachedProcRoot(); proc != nil { + return proc, nil + } + return getProcRoot(true) +} + +// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or +// masked paths (but also without "subset=pid"). +func OpenUnsafeProcRoot() (*Handle, error) { return getProcRoot(false) } + +func getProcRoot(subset bool) (*Handle, error) { + proc, err := privateProcRoot(subset) + if err != nil { + // Fall back to using a /proc handle if making a private mount failed. + // If we have openat2, at least we can avoid some kinds of over-mount + // attacks, but without openat2 there's not much we can do. + proc, err = unsafeHostProcRoot() + } + return proc, err +} + +var hasProcThreadSelf = gocompat.SyncOnceValue(func() bool { + return unix.Access("/proc/thread-self/", unix.F_OK) == nil +}) + +var errUnsafeProcfs = errors.New("unsafe procfs detected") + +// lookup is a very minimal wrapper around [procfsLookupInRoot] which is +// intended to be called from the external API. +func (proc *Handle) lookup(subpath string) (*os.File, error) { + handle, err := procfsLookupInRoot(proc.Inner, subpath) + if err != nil { + return nil, err + } + return handle, nil +} + +// procfsBase is an enum indicating the prefix of a subpath in operations +// involving [Handle]s. +type procfsBase string + +const ( + // ProcRoot refers to the root of the procfs (i.e., "/proc/"). + ProcRoot procfsBase = "/proc" + // ProcSelf refers to the current process' subdirectory (i.e., + // "/proc/self/"). + ProcSelf procfsBase = "/proc/self" + // ProcThreadSelf refers to the current thread's subdirectory (i.e., + // "/proc/thread-self/"). In multi-threaded programs (i.e., all Go + // programs) where one thread has a different CLONE_FS, it is possible for + // "/proc/self" to point the wrong thread and so "/proc/thread-self" may be + // necessary. Note that on pre-3.17 kernels, "/proc/thread-self" doesn't + // exist and so a fallback will be used in that case. + ProcThreadSelf procfsBase = "/proc/thread-self" + // TODO: Switch to an interface setup so we can have a more type-safe + // version of ProcPid and remove the need to worry about invalid string + // values. +) + +// prefix returns a prefix that can be used with the given [Handle]. +func (base procfsBase) prefix(proc *Handle) (string, error) { + switch base { + case ProcRoot: + return ".", nil + case ProcSelf: + return "self", nil + case ProcThreadSelf: + threadSelf := "thread-self" + if !hasProcThreadSelf() || hookForceProcSelfTask() { + // Pre-3.17 kernels don't have /proc/thread-self, so do it + // manually. + threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + if err := fd.Faccessat(proc.Inner, threadSelf, unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() { + // In this case, we running in a pid namespace that doesn't + // match the /proc mount we have. This can happen inside runc. + // + // Unfortunately, there is no nice way to get the correct TID + // to use here because of the age of the kernel, so we have to + // just use /proc/self and hope that it works. + threadSelf = "self" + } + } + return threadSelf, nil + } + return "", fmt.Errorf("invalid procfs base %q", base) +} + +// ProcThreadSelfCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [ProcThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ProcThreadSelfCloser func() + +// open is the core lookup operation for [Handle]. It returns a handle to +// "/proc//". If the returned [ProcThreadSelfCloser] is non-nil, +// you should call it after you are done interacting with the returned handle. +// +// In general you should use prefer to use the other helpers, as they remove +// the need to interact with [procfsBase] and do not return a nil +// [ProcThreadSelfCloser] for [procfsBase] values other than [ProcThreadSelf] +// where it is necessary. +func (proc *Handle) open(base procfsBase, subpath string) (_ *os.File, closer ProcThreadSelfCloser, Err error) { + prefix, err := base.prefix(proc) + if err != nil { + return nil, nil, err + } + subpath = prefix + "/" + subpath + + switch base { + case ProcRoot: + file, err := proc.lookup(subpath) + if errors.Is(err, os.ErrNotExist) { + // The Handle handle in use might be a subset=pid one, which will + // result in spurious errors. In this case, just open a temporary + // unmasked procfs handle for this operation. + proc, err2 := OpenUnsafeProcRoot() // !subset=pid + if err2 != nil { + return nil, nil, err + } + defer proc.Close() //nolint:errcheck // close failures aren't critical here + + file, err = proc.lookup(subpath) + } + return file, nil, err + + case ProcSelf: + file, err := proc.lookup(subpath) + return file, nil, err + + case ProcThreadSelf: + // We need to lock our thread until the caller is done with the handle + // because between getting the handle and using it we could get + // interrupted by the Go runtime and hit the case where the underlying + // thread is swapped out and the original thread is killed, resulting + // in pull-your-hair-out-hard-to-debug issues in the caller. + runtime.LockOSThread() + defer func() { + if Err != nil { + runtime.UnlockOSThread() + closer = nil + } + }() + + file, err := proc.lookup(subpath) + return file, runtime.UnlockOSThread, err + } + // should never be reached + return nil, nil, fmt.Errorf("[internal error] invalid procfs base %q", base) +} + +// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an +// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). +// Once finished with the handle, you must call the returned closer function +// (runtime.UnlockOSThread). You must not pass the returned *os.File to other +// Go threads or use the handle after calling the closer. +func (proc *Handle) OpenThreadSelf(subpath string) (_ *os.File, _ ProcThreadSelfCloser, Err error) { + return proc.open(ProcThreadSelf, subpath) +} + +// OpenSelf returns a handle to /proc/self/. +func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { + file, closer, err := proc.open(ProcSelf, subpath) + assert.Assert(closer == nil, "closer for ProcSelf must be nil") + return file, err +} + +// OpenRoot returns a handle to /proc/. +func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { + file, closer, err := proc.open(ProcRoot, subpath) + assert.Assert(closer == nil, "closer for ProcRoot must be nil") + return file, err +} + +// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). +// This is mainly intended for usage when operating on other processes. +func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { + return proc.OpenRoot(strconv.Itoa(pid) + "/" + subpath) +} + +// checkSubpathOvermount checks if the dirfd and path combination is on the +// same mount as the given root. +func checkSubpathOvermount(root, dir fd.Fd, path string) error { + // Get the mntID of our procfs handle. + expectedMountID, err := fd.GetMountID(root, "") + if err != nil { + return fmt.Errorf("get root mount id: %w", err) + } + // Get the mntID of the target magic-link. + gotMountID, err := fd.GetMountID(dir, path) + if err != nil { + return fmt.Errorf("get subpath mount id: %w", err) + } + // As long as the directory mount is alive, even with wrapping mount IDs, + // we would expect to see a different mount ID here. (Of course, if we're + // using unsafeHostProcRoot() then an attaker could change this after we + // did this check.) + if expectedMountID != gotMountID { + return fmt.Errorf("%w: subpath %s/%s has an overmount obscuring the real path (mount ids do not match %d != %d)", + errUnsafeProcfs, dir.Name(), path, expectedMountID, gotMountID) + } + return nil +} + +// Readlink performs a readlink operation on "/proc//" in a way +// that should be free from race attacks. This is most commonly used to get the +// real path of a file by looking at "/proc/self/fd/$n", with the same safety +// protections as [Open] (as well as some additional checks against +// overmounts). +func (proc *Handle) Readlink(base procfsBase, subpath string) (string, error) { + link, closer, err := proc.open(base, subpath) + if closer != nil { + defer closer() + } + if err != nil { + return "", fmt.Errorf("get safe %s/%s handle: %w", base, subpath, err) + } + defer link.Close() //nolint:errcheck // close failures aren't critical here + + // Try to detect if there is a mount on top of the magic-link. This should + // be safe in general (a mount on top of the path afterwards would not + // affect the handle itself) and will definitely be safe if we are using + // privateProcRoot() (at least since Linux 5.12[1], when anonymous mount + // namespaces were completely isolated from external mounts including mount + // propagation events). + // + // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts + // onto targets that reside on shared mounts"). + if err := checkSubpathOvermount(proc.Inner, link, ""); err != nil { + return "", fmt.Errorf("check safety of %s/%s magiclink: %w", base, subpath, err) + } + + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit + // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty + // relative pathnames"). + return fd.Readlinkat(link, "") +} + +// ProcSelfFdReadlink gets the real path of the given file by looking at +// readlink(/proc/thread-self/fd/$n). +// +// This is just a wrapper around [Handle.Readlink]. +func ProcSelfFdReadlink(fd fd.Fd) (string, error) { + procRoot, err := OpenProcRoot() // subset=pid + if err != nil { + return "", err + } + defer procRoot.Close() //nolint:errcheck // close failures aren't critical here + + fdPath := "fd/" + strconv.Itoa(int(fd.Fd())) + return procRoot.Readlink(ProcThreadSelf, fdPath) +} + +// CheckProcSelfFdPath returns whether the given file handle matches the +// expected path. (This is inherently racy.) +func CheckProcSelfFdPath(path string, file fd.Fd) error { + if err := fd.IsDeadInode(file); err != nil { + return err + } + actualPath, err := ProcSelfFdReadlink(file) + if err != nil { + return fmt.Errorf("get path of handle: %w", err) + } + if actualPath != path { + return fmt.Errorf("%w: handle path %q doesn't match expected path %q", internal.ErrPossibleBreakout, actualPath, path) + } + return nil +} + +// ReopenFd takes an existing file descriptor and "re-opens" it through +// /proc/thread-self/fd/. This allows for O_PATH file descriptors to be +// upgraded to regular file descriptors, as well as changing the open mode of a +// regular file descriptor. Some filesystems have unique handling of open(2) +// which make this incredibly useful (such as /dev/ptmx). +func ReopenFd(handle fd.Fd, flags int) (*os.File, error) { + procRoot, err := OpenProcRoot() // subset=pid + if err != nil { + return nil, err + } + defer procRoot.Close() //nolint:errcheck // close failures aren't critical here + + // We can't operate on /proc/thread-self/fd/$n directly when doing a + // re-open, so we need to open /proc/thread-self/fd and then open a single + // final component. + procFdDir, closer, err := procRoot.OpenThreadSelf("fd/") + if err != nil { + return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err) + } + defer procFdDir.Close() //nolint:errcheck // close failures aren't critical here + defer closer() + + // Try to detect if there is a mount on top of the magic-link we are about + // to open. If we are using unsafeHostProcRoot(), this could change after + // we check it (and there's nothing we can do about that) but for + // privateProcRoot() this should be guaranteed to be safe (at least since + // Linux 5.12[1], when anonymous mount namespaces were completely isolated + // from external mounts including mount propagation events). + // + // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts + // onto targets that reside on shared mounts"). + fdStr := strconv.Itoa(int(handle.Fd())) + if err := checkSubpathOvermount(procRoot.Inner, procFdDir, fdStr); err != nil { + return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err) + } + + flags |= unix.O_CLOEXEC + // Rather than just wrapping fd.Openat, open-code it so we can copy + // handle.Name(). + reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0) + if err != nil { + return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err) + } + return os.NewFile(uintptr(reopenFd), handle.Name()), nil +} + +// Test hooks used in the procfs tests to verify that the fallback logic works. +// See testing_mocks_linux_test.go and procfs_linux_test.go for more details. +var ( + hookForcePrivateProcRootOpenTree = hookDummyFile + hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile + hookForceGetProcRootUnsafe = hookDummy + + hookForceProcSelfTask = hookDummy + hookForceProcSelf = hookDummy +) + +func hookDummy() bool { return false } +func hookDummyFile(_ io.Closer) bool { return false } diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go new file mode 100644 index 0000000000..1ad1f18eee --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// This code is adapted to be a minimal version of the libpathrs proc resolver +// . +// As we only need O_PATH|O_NOFOLLOW support, this is not too much to port. + +package procfs + +import ( + "fmt" + "os" + "path" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/internal/consts" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" +) + +// procfsLookupInRoot is a stripped down version of completeLookupInRoot, +// entirely designed to support the very small set of features necessary to +// make procfs handling work. Unlike completeLookupInRoot, we always have +// O_PATH|O_NOFOLLOW behaviour for trailing symlinks. +// +// The main restrictions are: +// +// - ".." is not supported (as it requires either os.Root-style replays, +// which is more bug-prone; or procfs verification, which is not possible +// due to re-entrancy issues). +// - Absolute symlinks for the same reason (and all absolute symlinks in +// procfs are magic-links, which we want to skip anyway). +// - If statx is supported (checkSymlinkOvermount), any mount-point crossings +// (which is the main attack of concern against /proc). +// - Partial lookups are not supported, so the symlink stack is not needed. +// - Trailing slash special handling is not necessary in most cases (if we +// operating on procfs, it's usually with programmer-controlled strings +// that will then be re-opened), so we skip it since whatever re-opens it +// can deal with it. It's a creature comfort anyway. +// +// If the system supports openat2(), this is implemented using equivalent flags +// (RESOLVE_BENEATH | RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS). +func procfsLookupInRoot(procRoot fd.Fd, unsafePath string) (Handle *os.File, _ error) { + unsafePath = filepath.ToSlash(unsafePath) // noop + + // Make sure that an empty unsafe path still returns something sane, even + // with openat2 (which doesn't have AT_EMPTY_PATH semantics yet). + if unsafePath == "" { + unsafePath = "." + } + + // This is already checked by getProcRoot, but make sure here since the + // core security of this lookup is based on this assumption. + if err := verifyProcRoot(procRoot); err != nil { + return nil, err + } + + if linux.HasOpenat2() { + // We prefer being able to use RESOLVE_NO_XDEV if we can, to be + // absolutely sure we are operating on a clean /proc handle that + // doesn't have any cheeky overmounts that could trick us (including + // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't + // strictly needed, but just use it since we have it. + // + // NOTE: /proc/self is technically a magic-link (the contents of the + // symlink are generated dynamically), but it doesn't use + // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it. + // + // TODO: It would be nice to have RESOLVE_NO_DOTDOT, purely for + // self-consistency with the backup O_PATH resolver. + handle, err := fd.Openat2(procRoot, unsafePath, &unix.OpenHow{ + Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS, + }) + if err != nil { + // TODO: Once we bump the minimum Go version to 1.20, we can use + // multiple %w verbs for this wrapping. For now we need to use a + // compatibility shim for older Go versions. + // err = fmt.Errorf("%w: %w", errUnsafeProcfs, err) + return nil, gocompat.WrapBaseError(err, errUnsafeProcfs) + } + return handle, nil + } + + // To mirror openat2(RESOLVE_BENEATH), we need to return an error if the + // path is absolute. + if path.IsAbs(unsafePath) { + return nil, fmt.Errorf("%w: cannot resolve absolute paths in procfs resolver", internal.ErrPossibleBreakout) + } + + currentDir, err := fd.Dup(procRoot) + if err != nil { + return nil, fmt.Errorf("clone root fd: %w", err) + } + defer func() { + // If a handle is not returned, close the internal handle. + if Handle == nil { + _ = currentDir.Close() + } + }() + + var ( + linksWalked int + currentPath string + remainingPath = unsafePath + ) + for remainingPath != "" { + // Get the next path component. + var part string + if i := strings.IndexByte(remainingPath, '/'); i == -1 { + part, remainingPath = remainingPath, "" + } else { + part, remainingPath = remainingPath[:i], remainingPath[i+1:] + } + if part == "" { + // no-op component, but treat it the same as "." + part = "." + } + if part == ".." { + // not permitted + return nil, fmt.Errorf("%w: cannot walk into '..' in procfs resolver", internal.ErrPossibleBreakout) + } + + // Apply the component lexically to the path we are building. + // currentPath does not contain any symlinks, and we are lexically + // dealing with a single component, so it's okay to do a filepath.Clean + // here. (Not to mention that ".." isn't allowed.) + nextPath := path.Join("/", currentPath, part) + // If we logically hit the root, just clone the root rather than + // opening the part and doing all of the other checks. + if nextPath == "/" { + // Jump to root. + rootClone, err := fd.Dup(procRoot) + if err != nil { + return nil, fmt.Errorf("clone root fd: %w", err) + } + _ = currentDir.Close() + currentDir = rootClone + currentPath = nextPath + continue + } + + // Try to open the next component. + nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + + // Make sure we are still on procfs and haven't crossed mounts. + if err := verifyProcHandle(nextDir); err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("check %q component is on procfs: %w", part, err) + } + if err := checkSubpathOvermount(procRoot, nextDir, ""); err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("check %q component is not overmounted: %w", part, err) + } + + // We are emulating O_PATH|O_NOFOLLOW, so we only need to traverse into + // trailing symlinks if we are not the final component. Otherwise we + // can just return the currentDir. + if remainingPath != "" { + st, err := nextDir.Stat() + if err != nil { + _ = nextDir.Close() + return nil, fmt.Errorf("stat component %q: %w", part, err) + } + + if st.Mode()&os.ModeType == os.ModeSymlink { + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See + // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and + // fstatat() with empty relative pathnames"). + linkDest, err := fd.Readlinkat(nextDir, "") + // We don't need the handle anymore. + _ = nextDir.Close() + if err != nil { + return nil, err + } + + linksWalked++ + if linksWalked > consts.MaxSymlinkLimit { + return nil, &os.PathError{Op: "securejoin.procfsLookupInRoot", Path: "/proc/" + unsafePath, Err: unix.ELOOP} + } + + // Update our logical remaining path. + remainingPath = linkDest + "/" + remainingPath + // Absolute symlinks are probably magiclinks, we reject them. + if path.IsAbs(linkDest) { + return nil, fmt.Errorf("%w: cannot jump to / in procfs resolver -- possible magiclink", internal.ErrPossibleBreakout) + } + continue + } + } + + // Walk into the next component. + _ = currentDir.Close() + currentDir = nextDir + currentPath = nextPath + } + + // One final sanity-check. + if err := verifyProcHandle(currentDir); err != nil { + return nil, fmt.Errorf("check final handle is on procfs: %w", err) + } + if err := checkSubpathOvermount(procRoot, currentDir, ""); err != nil { + return nil, fmt.Errorf("check final handle is not overmounted: %w", err) + } + return currentDir, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir.go new file mode 100644 index 0000000000..b43169564a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// MkdirAll is a race-safe alternative to the [os.MkdirAll] function, +// where the new directory is guaranteed to be within the root directory (if an +// attacker can move directories from inside the root to outside the root, the +// created directory tree might be outside of the root but the key constraint +// is that at no point will we walk outside of the directory tree we are +// creating). +// +// Effectively, MkdirAll(root, unsafePath, mode) is equivalent to +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// err := os.MkdirAll(path, mode) +// +// But is much safer. The above implementation is unsafe because if an attacker +// can modify the filesystem tree between [SecureJoin] and [os.MkdirAll], it is +// possible for MkdirAll to resolve unsafe symlink components and create +// directories outside of the root. +// +// If you plan to open the directory after you have created it or want to use +// an open directory handle as the root, you should use [MkdirAllHandle] instead. +// This function is a wrapper around [MkdirAllHandle]. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAll(root, unsafePath string, mode os.FileMode) error { + rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return err + } + defer rootDir.Close() //nolint:errcheck // close failures aren't critical here + + f, err := MkdirAllHandle(rootDir, unsafePath, mode) + if err != nil { + return err + } + _ = f.Close() + return nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_libpathrs.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_libpathrs.go new file mode 100644 index 0000000000..f864dbc8f3 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_libpathrs.go @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "cyphar.com/go-pathrs" +) + +// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use +// in two respects: +// +// - The caller provides the root directory as an *[os.File] (preferably O_PATH) +// handle. This means that the caller can be sure which root directory is +// being used. Note that this can be emulated by using /proc/self/fd/... as +// the root path with [os.MkdirAll]. +// +// - Once all of the directories have been created, an *[os.File] O_PATH handle +// to the directory at unsafePath is returned to the caller. This is done in +// an effectively-race-free way (an attacker would only be able to swap the +// final directory component), which is not possible to emulate with +// [MkdirAll]. +// +// In addition, the returned handle is obtained far more efficiently than doing +// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after +// doing [MkdirAll]. If you intend to open the directory after creating it, you +// should use MkdirAllHandle. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (*os.File, error) { + rootRef, err := pathrs.RootFromFile(root) + if err != nil { + return nil, err + } + defer rootRef.Close() //nolint:errcheck // close failures aren't critical here + + handle, err := rootRef.MkdirAll(unsafePath, mode) + if err != nil { + return nil, err + } + return handle.IntoFile(), nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_purego.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_purego.go new file mode 100644 index 0000000000..0369dfe7e6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_purego.go @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux && !libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs" +) + +// MkdirAllHandle is equivalent to [MkdirAll], except that it is safer to use +// in two respects: +// +// - The caller provides the root directory as an *[os.File] (preferably O_PATH) +// handle. This means that the caller can be sure which root directory is +// being used. Note that this can be emulated by using /proc/self/fd/... as +// the root path with [os.MkdirAll]. +// +// - Once all of the directories have been created, an *[os.File] O_PATH handle +// to the directory at unsafePath is returned to the caller. This is done in +// an effectively-race-free way (an attacker would only be able to swap the +// final directory component), which is not possible to emulate with +// [MkdirAll]. +// +// In addition, the returned handle is obtained far more efficiently than doing +// a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after +// doing [MkdirAll]. If you intend to open the directory after creating it, you +// should use MkdirAllHandle. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (*os.File, error) { + return gopathrs.MkdirAllHandle(root, unsafePath, mode) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open.go new file mode 100644 index 0000000000..41b628907e --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open.go @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// OpenInRoot safely opens the provided unsafePath within the root. +// Effectively, OpenInRoot(root, unsafePath) is equivalent to +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC) +// +// But is much safer. The above implementation is unsafe because if an attacker +// can modify the filesystem tree between [SecureJoin] and [os.OpenFile], it is +// possible for the returned file to be outside of the root. +// +// Note that the returned handle is an O_PATH handle, meaning that only a very +// limited set of operations will work on the handle. This is done to avoid +// accidentally opening an untrusted file that could cause issues (such as a +// disconnected TTY that could cause a DoS, or some other issue). In order to +// use the returned handle, you can "upgrade" it to a proper handle using +// [Reopen]. +// +// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin +func OpenInRoot(root, unsafePath string) (*os.File, error) { + rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + defer rootDir.Close() //nolint:errcheck // close failures aren't critical here + return OpenatInRoot(rootDir, unsafePath) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_libpathrs.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_libpathrs.go new file mode 100644 index 0000000000..53352000e6 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_libpathrs.go @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "cyphar.com/go-pathrs" +) + +// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided +// using an *[os.File] handle, to ensure that the correct root directory is used. +func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { + rootRef, err := pathrs.RootFromFile(root) + if err != nil { + return nil, err + } + defer rootRef.Close() //nolint:errcheck // close failures aren't critical here + + handle, err := rootRef.Resolve(unsafePath) + if err != nil { + return nil, err + } + return handle.IntoFile(), nil +} + +// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd. +// Reopen(file, flags) is effectively equivalent to +// +// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd()) +// os.OpenFile(fdPath, flags|unix.O_CLOEXEC) +// +// But with some extra hardenings to ensure that we are not tricked by a +// maliciously-configured /proc mount. While this attack scenario is not +// common, in container runtimes it is possible for higher-level runtimes to be +// tricked into configuring an unsafe /proc that can be used to attack file +// operations. See [CVE-2019-19921] for more details. +// +// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw +func Reopen(file *os.File, flags int) (*os.File, error) { + handle, err := pathrs.HandleFromFile(file) + if err != nil { + return nil, err + } + defer handle.Close() //nolint:errcheck // close failures aren't critical here + + return handle.OpenFile(flags) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_purego.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_purego.go new file mode 100644 index 0000000000..6d1be12ce5 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_purego.go @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux && !libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +package pathrs + +import ( + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs" + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" +) + +// OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided +// using an *[os.File] handle, to ensure that the correct root directory is used. +func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { + return gopathrs.OpenatInRoot(root, unsafePath) +} + +// Reopen takes an *[os.File] handle and re-opens it through /proc/self/fd. +// Reopen(file, flags) is effectively equivalent to +// +// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd()) +// os.OpenFile(fdPath, flags|unix.O_CLOEXEC) +// +// But with some extra hardenings to ensure that we are not tricked by a +// maliciously-configured /proc mount. While this attack scenario is not +// common, in container runtimes it is possible for higher-level runtimes to be +// tricked into configuring an unsafe /proc that can be used to attack file +// operations. See [CVE-2019-19921] for more details. +// +// [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw +func Reopen(handle *os.File, flags int) (*os.File, error) { + return procfs.ReopenFd(handle, flags) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_libpathrs.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_libpathrs.go new file mode 100644 index 0000000000..6c4df3763b --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_libpathrs.go @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package procfs provides a safe API for operating on /proc on Linux. +package procfs + +import ( + "os" + "strconv" + + "cyphar.com/go-pathrs/procfs" + "golang.org/x/sys/unix" +) + +// ProcThreadSelfCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [Handle.OpenThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ProcThreadSelfCloser = procfs.ThreadCloser + +// Handle is a wrapper around an *os.File handle to "/proc", which can be used +// to do further procfs-related operations in a safe way. +type Handle struct { + inner *procfs.Handle +} + +// Close close the resources associated with this [Handle]. Note that if this +// [Handle] was created with [OpenProcRoot], on some kernels the underlying +// procfs handle is cached and so this Close operation may be a no-op. However, +// you should always call Close on [Handle]s once you are done with them. +func (proc *Handle) Close() error { return proc.inner.Close() } + +// OpenProcRoot tries to open a "safer" handle to "/proc" (i.e., one with the +// "subset=pid" mount option applied, available from Linux 5.8). Unless you +// plan to do many [Handle.OpenRoot] operations, users should prefer to use +// this over [OpenUnsafeProcRoot] which is far more dangerous to keep open. +// +// If a safe handle cannot be opened, OpenProcRoot will fall back to opening a +// regular "/proc" handle. +// +// Note that using [Handle.OpenRoot] will still work with handles returned by +// this function. If a subpath cannot be operated on with a safe "/proc" +// handle, then [OpenUnsafeProcRoot] will be called internally and a temporary +// unsafe handle will be used. +func OpenProcRoot() (*Handle, error) { + proc, err := procfs.Open() + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or +// masked paths. You must be extremely careful to make sure this handle is +// never leaked to a container and that you program cannot be tricked into +// writing to arbitrary paths within it. +// +// This is not necessary if you just wish to use [Handle.OpenRoot], as handles +// returned by [OpenProcRoot] will fall back to using a *temporary* unsafe +// handle in that case. You should only really use this if you need to do many +// operations with [Handle.OpenRoot] and the performance overhead of making +// many procfs handles is an issue. If you do use OpenUnsafeProcRoot, you +// should make sure to close the handle as soon as possible to avoid +// known-fd-number attacks. +func OpenUnsafeProcRoot() (*Handle, error) { + proc, err := procfs.Open(procfs.UnmaskedProcRoot) + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an +// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). +// Once finished with the handle, you must call the returned closer function +// ([runtime.UnlockOSThread]). You must not pass the returned *os.File to other +// Go threads or use the handle after calling the closer. +// +// [runtime.UnlockOSThread]: https://pkg.go.dev/runtime#UnlockOSThread +func (proc *Handle) OpenThreadSelf(subpath string) (*os.File, ProcThreadSelfCloser, error) { + return proc.inner.OpenThreadSelf(subpath, unix.O_PATH|unix.O_NOFOLLOW) +} + +// OpenSelf returns a handle to /proc/self/. +// +// Note that in Go programs with non-homogenous threads, this may result in +// spurious errors. If you are monkeying around with APIs that are +// thread-specific, you probably want to use [Handle.OpenThreadSelf] instead +// which will guarantee that the handle refers to the same thread as the caller +// is executing on. +func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { + return proc.inner.OpenSelf(subpath, unix.O_PATH|unix.O_NOFOLLOW) +} + +// OpenRoot returns a handle to /proc/. +// +// You should only use this when you need to operate on global procfs files +// (such as sysctls in /proc/sys). Unlike [Handle.OpenThreadSelf], +// [Handle.OpenSelf], and [Handle.OpenPid], the procfs handle used internally +// for this operation will never use "subset=pid", which makes it a more juicy +// target for [CVE-2024-21626]-style attacks (and doing something like opening +// a directory with OpenRoot effectively leaks [OpenUnsafeProcRoot] as long as +// the file descriptor is open). +// +// [CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv +func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { + return proc.inner.OpenRoot(subpath, unix.O_PATH|unix.O_NOFOLLOW) +} + +// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). +// This is mainly intended for usage when operating on other processes. +// +// You should not use this for the current thread, as special handling is +// needed for /proc/thread-self (or /proc/self/task/) when dealing with +// goroutine scheduling -- use [Handle.OpenThreadSelf] instead. +// +// To refer to the current thread-group, you should use prefer +// [Handle.OpenSelf] to passing os.Getpid as the pid argument. +func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { + return proc.inner.OpenPid(pid, subpath, unix.O_PATH|unix.O_NOFOLLOW) +} + +// ProcSelfFdReadlink gets the real path of the given file by looking at +// /proc/self/fd/ with [readlink]. It is effectively just shorthand for +// something along the lines of: +// +// proc, err := procfs.OpenProcRoot() +// if err != nil { +// return err +// } +// link, err := proc.OpenThreadSelf(fmt.Sprintf("fd/%d", f.Fd())) +// if err != nil { +// return err +// } +// defer link.Close() +// var buf [4096]byte +// n, err := unix.Readlinkat(int(link.Fd()), "", buf[:]) +// if err != nil { +// return err +// } +// pathname := buf[:n] +// +// [readlink]: https://pkg.go.dev/golang.org/x/sys/unix#Readlinkat +func ProcSelfFdReadlink(f *os.File) (string, error) { + proc, err := procfs.Open() + if err != nil { + return "", err + } + defer proc.Close() //nolint:errcheck // close failures aren't critical here + + fdPath := "fd/" + strconv.Itoa(int(f.Fd())) + return proc.Readlink(procfs.ProcThreadSelf, fdPath) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_purego.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_purego.go new file mode 100644 index 0000000000..9383002f9a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_purego.go @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: MPL-2.0 + +//go:build linux && !libpathrs + +// Copyright (C) 2024-2025 Aleksa Sarai +// Copyright (C) 2024-2025 SUSE LLC +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Package procfs provides a safe API for operating on /proc on Linux. +package procfs + +import ( + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" +) + +// This package mostly just wraps internal/procfs APIs. This is necessary +// because we are forced to export some things from internal/procfs in order to +// avoid some dependency cycle issues, but we don't want users to see or use +// them. + +// ProcThreadSelfCloser is a callback that needs to be called when you are done +// operating on an [os.File] fetched using [Handle.OpenThreadSelf]. +// +// [os.File]: https://pkg.go.dev/os#File +type ProcThreadSelfCloser = procfs.ProcThreadSelfCloser + +// Handle is a wrapper around an *os.File handle to "/proc", which can be used +// to do further procfs-related operations in a safe way. +type Handle struct { + inner *procfs.Handle +} + +// Close close the resources associated with this [Handle]. Note that if this +// [Handle] was created with [OpenProcRoot], on some kernels the underlying +// procfs handle is cached and so this Close operation may be a no-op. However, +// you should always call Close on [Handle]s once you are done with them. +func (proc *Handle) Close() error { return proc.inner.Close() } + +// OpenProcRoot tries to open a "safer" handle to "/proc" (i.e., one with the +// "subset=pid" mount option applied, available from Linux 5.8). Unless you +// plan to do many [Handle.OpenRoot] operations, users should prefer to use +// this over [OpenUnsafeProcRoot] which is far more dangerous to keep open. +// +// If a safe handle cannot be opened, OpenProcRoot will fall back to opening a +// regular "/proc" handle. +// +// Note that using [Handle.OpenRoot] will still work with handles returned by +// this function. If a subpath cannot be operated on with a safe "/proc" +// handle, then [OpenUnsafeProcRoot] will be called internally and a temporary +// unsafe handle will be used. +func OpenProcRoot() (*Handle, error) { + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or +// masked paths. You must be extremely careful to make sure this handle is +// never leaked to a container and that you program cannot be tricked into +// writing to arbitrary paths within it. +// +// This is not necessary if you just wish to use [Handle.OpenRoot], as handles +// returned by [OpenProcRoot] will fall back to using a *temporary* unsafe +// handle in that case. You should only really use this if you need to do many +// operations with [Handle.OpenRoot] and the performance overhead of making +// many procfs handles is an issue. If you do use OpenUnsafeProcRoot, you +// should make sure to close the handle as soon as possible to avoid +// known-fd-number attacks. +func OpenUnsafeProcRoot() (*Handle, error) { + proc, err := procfs.OpenUnsafeProcRoot() + if err != nil { + return nil, err + } + return &Handle{inner: proc}, nil +} + +// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an +// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). +// Once finished with the handle, you must call the returned closer function +// ([runtime.UnlockOSThread]). You must not pass the returned *os.File to other +// Go threads or use the handle after calling the closer. +// +// [runtime.UnlockOSThread]: https://pkg.go.dev/runtime#UnlockOSThread +func (proc *Handle) OpenThreadSelf(subpath string) (*os.File, ProcThreadSelfCloser, error) { + return proc.inner.OpenThreadSelf(subpath) +} + +// OpenSelf returns a handle to /proc/self/. +// +// Note that in Go programs with non-homogenous threads, this may result in +// spurious errors. If you are monkeying around with APIs that are +// thread-specific, you probably want to use [Handle.OpenThreadSelf] instead +// which will guarantee that the handle refers to the same thread as the caller +// is executing on. +func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { + return proc.inner.OpenSelf(subpath) +} + +// OpenRoot returns a handle to /proc/. +// +// You should only use this when you need to operate on global procfs files +// (such as sysctls in /proc/sys). Unlike [Handle.OpenThreadSelf], +// [Handle.OpenSelf], and [Handle.OpenPid], the procfs handle used internally +// for this operation will never use "subset=pid", which makes it a more juicy +// target for [CVE-2024-21626]-style attacks (and doing something like opening +// a directory with OpenRoot effectively leaks [OpenUnsafeProcRoot] as long as +// the file descriptor is open). +// +// [CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv +func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { + return proc.inner.OpenRoot(subpath) +} + +// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). +// This is mainly intended for usage when operating on other processes. +// +// You should not use this for the current thread, as special handling is +// needed for /proc/thread-self (or /proc/self/task/) when dealing with +// goroutine scheduling -- use [Handle.OpenThreadSelf] instead. +// +// To refer to the current thread-group, you should use prefer +// [Handle.OpenSelf] to passing os.Getpid as the pid argument. +func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { + return proc.inner.OpenPid(pid, subpath) +} + +// ProcSelfFdReadlink gets the real path of the given file by looking at +// /proc/self/fd/ with [readlink]. It is effectively just shorthand for +// something along the lines of: +// +// proc, err := procfs.OpenProcRoot() +// if err != nil { +// return err +// } +// link, err := proc.OpenThreadSelf(fmt.Sprintf("fd/%d", f.Fd())) +// if err != nil { +// return err +// } +// defer link.Close() +// var buf [4096]byte +// n, err := unix.Readlinkat(int(link.Fd()), "", buf[:]) +// if err != nil { +// return err +// } +// pathname := buf[:n] +// +// [readlink]: https://pkg.go.dev/golang.org/x/sys/unix#Readlinkat +func ProcSelfFdReadlink(f *os.File) (string, error) { + return procfs.ProcSelfFdReadlink(f) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go new file mode 100644 index 0000000000..4d89a481ca --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: BSD-3-Clause + +// Copyright (C) 2017-2024 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package securejoin + +import "os" + +// In future this should be moved into a separate package, because now there +// are several projects (umoci and go-mtree) that are using this sort of +// interface. + +// VFS is the minimal interface necessary to use [SecureJoinVFS]. A nil VFS is +// equivalent to using the standard [os].* family of functions. This is mainly +// used for the purposes of mock testing, but also can be used to otherwise use +// [SecureJoinVFS] with VFS-like system. +type VFS interface { + // Lstat returns an [os.FileInfo] describing the named file. If the + // file is a symbolic link, the returned [os.FileInfo] describes the + // symbolic link. Lstat makes no attempt to follow the link. + // The semantics are identical to [os.Lstat]. + Lstat(name string) (os.FileInfo, error) + + // Readlink returns the destination of the named symbolic link. + // The semantics are identical to [os.Readlink]. + Readlink(name string) (string, error) +} + +// osVFS is the "nil" VFS, in that it just passes everything through to the os +// module. +type osVFS struct{} + +func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } + +func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/vendor/github.com/moby/sys/capability/CHANGELOG.md b/vendor/github.com/moby/sys/capability/CHANGELOG.md new file mode 100644 index 0000000000..299b36d92a --- /dev/null +++ b/vendor/github.com/moby/sys/capability/CHANGELOG.md @@ -0,0 +1,124 @@ +# Changelog +This file documents all notable changes made to this project since the initial fork +from https://github.com/syndtr/gocapability/commit/42c35b4376354fd5. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.4.0] - 2024-11-11 + +### Added +* New separate API for ambient ([GetAmbient], [SetAmbient], [ResetAmbient]) + and bound ([GetBound], [DropBound]) capabilities, modelled after libcap. (#176) + +### Fixed +* [Apply] now returns an error if called for non-zero `pid`. Before this change, + it could silently change some capabilities of the current process, instead of + the one identified by the `pid`. (#168, #174) +* Fixed tests that change capabilities to be run in a separate process. (#173) +* Other improvements in tests. (#169, #170) + +### Changed +* Use raw syscalls (which are slightly faster). (#176) +* Most tests are now limited to testing the public API of the package. (#162) +* Simplify parsing /proc/*pid*/status, add a test case. (#162) +* Optimize the number of syscall to set ambient capabilities in Apply + by clearing them first; add a test case. (#163, #164) +* Better documentation for [Apply], [NewFile], [NewFile2], [NewPid], [NewPid2]. (#175) + +### Removed +* `.golangci.yml` and `.codespellrc` are no longer part of the package. (#158) + +## [0.3.0] - 2024-09-25 + +### Added +* Added [ListKnown] and [ListSupported] functions. (#153) +* [LastCap] is now available on non-Linux platforms (where it returns an error). (#152) + +### Changed +* [List] is now deprecated in favor of [ListKnown] and [ListSupported]. (#153) + +### Fixed +* Various documentation improvements. (#151) +* Fix "generated code" comment. (#153) + +## [0.2.0] - 2024-09-16 + +This is the first release after the move to a new home in +github.com/moby/sys/capability. + +### Fixed + * Fixed URLs in documentation to reflect the new home. + +## [0.1.1] - 2024-08-01 + +This is a maintenance release, fixing a few minor issues. + +### Fixed + * Fixed future kernel compatibility, for real this time. [#11] + * Fixed [LastCap] to be a function. [#12] + +## [0.1.0] - 2024-07-31 + +This is an initial release since the fork. + +### Breaking changes + + * The `CAP_LAST_CAP` variable is removed; users need to modify the code to + use [LastCap] to get the value. [#6] + * The code now requires Go >= 1.21. + +### Added + * `go.mod` and `go.sum` files. [#2] + * New [LastCap] function. [#6] + * Basic CI using GHA infra. [#8], [#9] + * README and CHANGELOG. [#10] + +### Fixed + * Fixed ambient capabilities error handling in [Apply]. [#3] + * Fixed future kernel compatibility. [#1] + * Fixed various linter warnings. [#4], [#7] + +### Changed + * Go build tags changed from old-style (`+build`) to new Go 1.17+ style (`go:build`). [#2] + +### Removed + * Removed support for capabilities v1 and v2. [#1] + * Removed init function so programs that use this package start faster. [#6] + * Removed `CAP_LAST_CAP` (use [LastCap] instead). [#6] + + +[Apply]: https://pkg.go.dev/github.com/moby/sys/capability#Capabilities.Apply +[DropBound]: https://pkg.go.dev/github.com/moby/sys/capability#DropBound +[GetAmbient]: https://pkg.go.dev/github.com/moby/sys/capability#GetAmbient +[GetBound]: https://pkg.go.dev/github.com/moby/sys/capability#GetBound +[LastCap]: https://pkg.go.dev/github.com/moby/sys/capability#LastCap +[ListKnown]: https://pkg.go.dev/github.com/moby/sys/capability#ListKnown +[ListSupported]: https://pkg.go.dev/github.com/moby/sys/capability#ListSupported +[List]: https://pkg.go.dev/github.com/moby/sys/capability#List +[NewFile2]: https://pkg.go.dev/github.com/moby/sys/capability#NewFile2 +[NewFile]: https://pkg.go.dev/github.com/moby/sys/capability#NewFile +[NewPid2]: https://pkg.go.dev/github.com/moby/sys/capability#NewPid2 +[NewPid]: https://pkg.go.dev/github.com/moby/sys/capability#NewPid +[ResetAmbient]: https://pkg.go.dev/github.com/moby/sys/capability#ResetAmbient +[SetAmbient]: https://pkg.go.dev/github.com/moby/sys/capability#SetAmbient + + +[0.4.0]: https://github.com/moby/sys/releases/tag/capability%2Fv0.4.0 +[0.3.0]: https://github.com/moby/sys/releases/tag/capability%2Fv0.3.0 +[0.2.0]: https://github.com/moby/sys/releases/tag/capability%2Fv0.2.0 +[0.1.1]: https://github.com/kolyshkin/capability/compare/v0.1.0...v0.1.1 +[0.1.0]: https://github.com/kolyshkin/capability/compare/42c35b4376354fd5...v0.1.0 + + +[#1]: https://github.com/kolyshkin/capability/pull/1 +[#2]: https://github.com/kolyshkin/capability/pull/2 +[#3]: https://github.com/kolyshkin/capability/pull/3 +[#4]: https://github.com/kolyshkin/capability/pull/4 +[#6]: https://github.com/kolyshkin/capability/pull/6 +[#7]: https://github.com/kolyshkin/capability/pull/7 +[#8]: https://github.com/kolyshkin/capability/pull/8 +[#9]: https://github.com/kolyshkin/capability/pull/9 +[#10]: https://github.com/kolyshkin/capability/pull/10 +[#11]: https://github.com/kolyshkin/capability/pull/11 +[#12]: https://github.com/kolyshkin/capability/pull/12 diff --git a/vendor/github.com/moby/sys/capability/LICENSE b/vendor/github.com/moby/sys/capability/LICENSE new file mode 100644 index 0000000000..08adcd6ecf --- /dev/null +++ b/vendor/github.com/moby/sys/capability/LICENSE @@ -0,0 +1,25 @@ +Copyright 2023 The Capability Authors. +Copyright 2013 Suryandaru Triandana +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/moby/sys/capability/README.md b/vendor/github.com/moby/sys/capability/README.md new file mode 100644 index 0000000000..84b74871aa --- /dev/null +++ b/vendor/github.com/moby/sys/capability/README.md @@ -0,0 +1,13 @@ +This is a fork of (apparently no longer maintained) +https://github.com/syndtr/gocapability package. It provides basic primitives to +work with [Linux capabilities][capabilities(7)]. + +For changes, see [CHANGELOG.md](./CHANGELOG.md). + +[![Go Reference](https://pkg.go.dev/badge/github.com/moby/sys/capability/capability.svg)](https://pkg.go.dev/github.com/moby/sys/capability) + +## Alternatives + + * https://pkg.go.dev/kernel.org/pub/linux/libs/security/libcap/cap + +[capabilities(7)]: https://man7.org/linux/man-pages/man7/capabilities.7.html diff --git a/vendor/github.com/moby/sys/capability/capability.go b/vendor/github.com/moby/sys/capability/capability.go new file mode 100644 index 0000000000..11e47bed73 --- /dev/null +++ b/vendor/github.com/moby/sys/capability/capability.go @@ -0,0 +1,176 @@ +// Copyright 2023 The Capability Authors. +// Copyright 2013 Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package capability provides utilities for manipulating POSIX capabilities. +package capability + +type Capabilities interface { + // Get check whether a capability present in the given + // capabilities set. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Get(which CapType, what Cap) bool + + // Empty check whether all capability bits of the given capabilities + // set are zero. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Empty(which CapType) bool + + // Full check whether all capability bits of the given capabilities + // set are one. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Full(which CapType) bool + + // Set sets capabilities of the given capabilities sets. The + // 'which' value should be one or combination (OR'ed) of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Set(which CapType, caps ...Cap) + + // Unset unsets capabilities of the given capabilities sets. The + // 'which' value should be one or combination (OR'ed) of EFFECTIVE, + // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. + Unset(which CapType, caps ...Cap) + + // Fill sets all bits of the given capabilities kind to one. The + // 'kind' value should be one or combination (OR'ed) of CAPS, + // BOUNDS or AMBS. + Fill(kind CapType) + + // Clear sets all bits of the given capabilities kind to zero. The + // 'kind' value should be one or combination (OR'ed) of CAPS, + // BOUNDS or AMBS. + Clear(kind CapType) + + // String return current capabilities state of the given capabilities + // set as string. The 'which' value should be one of EFFECTIVE, + // PERMITTED, INHERITABLE BOUNDING or AMBIENT + StringCap(which CapType) string + + // String return current capabilities state as string. + String() string + + // Load load actual capabilities value. This will overwrite all + // outstanding changes. + Load() error + + // Apply apply the capabilities settings, so all changes made by + // [Set], [Unset], [Fill], or [Clear] will take effect. + Apply(kind CapType) error +} + +// NewPid initializes a new [Capabilities] object for given pid when +// it is nonzero, or for the current process if pid is 0. +// +// Deprecated: replace with [NewPid2] followed by optional [Capabilities.Load] +// (only if needed). For example, replace: +// +// c, err := NewPid(0) +// if err != nil { +// return err +// } +// +// with: +// +// c, err := NewPid2(0) +// if err != nil { +// return err +// } +// err = c.Load() +// if err != nil { +// return err +// } +func NewPid(pid int) (Capabilities, error) { + c, err := newPid(pid) + if err != nil { + return c, err + } + err = c.Load() + return c, err +} + +// NewPid2 initializes a new [Capabilities] object for given pid when +// it is nonzero, or for the current process if pid is 0. This +// does not load the process's current capabilities; if needed, +// call [Capabilities.Load]. +func NewPid2(pid int) (Capabilities, error) { + return newPid(pid) +} + +// NewFile initializes a new Capabilities object for given file path. +// +// Deprecated: replace with [NewFile2] followed by optional [Capabilities.Load] +// (only if needed). For example, replace: +// +// c, err := NewFile(path) +// if err != nil { +// return err +// } +// +// with: +// +// c, err := NewFile2(path) +// if err != nil { +// return err +// } +// err = c.Load() +// if err != nil { +// return err +// } +func NewFile(path string) (Capabilities, error) { + c, err := newFile(path) + if err != nil { + return c, err + } + err = c.Load() + return c, err +} + +// NewFile2 creates a new initialized [Capabilities] object for given +// file path. This does not load the process's current capabilities; +// if needed, call [Capabilities.Load]. +func NewFile2(path string) (Capabilities, error) { + return newFile(path) +} + +// LastCap returns highest valid capability of the running kernel, +// or an error if it can not be obtained. +// +// See also: [ListSupported]. +func LastCap() (Cap, error) { + return lastCap() +} + +// GetAmbient determines if a specific ambient capability is raised in the +// calling thread. +func GetAmbient(c Cap) (bool, error) { + return getAmbient(c) +} + +// SetAmbient raises or lowers specified ambient capabilities for the calling +// thread. To complete successfully, the prevailing effective capability set +// must have a raised CAP_SETPCAP. Further, to raise a specific ambient +// capability the inheritable and permitted sets of the calling thread must +// already contain the specified capability. +func SetAmbient(raise bool, caps ...Cap) error { + return setAmbient(raise, caps...) +} + +// ResetAmbient resets all of the ambient capabilities for the calling thread +// to their lowered value. +func ResetAmbient() error { + return resetAmbient() +} + +// GetBound determines if a specific bounding capability is raised in the +// calling thread. +func GetBound(c Cap) (bool, error) { + return getBound(c) +} + +// DropBound lowers the specified bounding set capability. +func DropBound(caps ...Cap) error { + return dropBound(caps...) +} diff --git a/vendor/github.com/moby/sys/capability/capability_linux.go b/vendor/github.com/moby/sys/capability/capability_linux.go new file mode 100644 index 0000000000..234b1efb29 --- /dev/null +++ b/vendor/github.com/moby/sys/capability/capability_linux.go @@ -0,0 +1,591 @@ +// Copyright 2023 The Capability Authors. +// Copyright 2013 Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package capability + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + "sync" + "syscall" +) + +const ( + linuxCapVer1 = 0x19980330 // No longer supported. + linuxCapVer2 = 0x20071026 // No longer supported. + linuxCapVer3 = 0x20080522 +) + +var lastCap = sync.OnceValues(func() (Cap, error) { + f, err := os.Open("/proc/sys/kernel/cap_last_cap") + if err != nil { + return 0, err + } + + buf := make([]byte, 11) + l, err := f.Read(buf) + f.Close() + if err != nil { + return 0, err + } + buf = buf[:l] + + last, err := strconv.Atoi(strings.TrimSpace(string(buf))) + if err != nil { + return 0, err + } + return Cap(last), nil +}) + +func capUpperMask() uint32 { + last, err := lastCap() + if err != nil || last < 32 { + return 0 + } + return (uint32(1) << (uint(last) - 31)) - 1 +} + +func mkStringCap(c Capabilities, which CapType) (ret string) { + last, err := lastCap() + if err != nil { + return "" + } + for i, first := Cap(0), true; i <= last; i++ { + if !c.Get(which, i) { + continue + } + if first { + first = false + } else { + ret += ", " + } + ret += i.String() + } + return +} + +func mkString(c Capabilities, max CapType) (ret string) { + ret = "{" + for i := CapType(1); i <= max; i <<= 1 { + ret += " " + i.String() + "=\"" + if c.Empty(i) { + ret += "empty" + } else if c.Full(i) { + ret += "full" + } else { + ret += c.StringCap(i) + } + ret += "\"" + } + ret += " }" + return +} + +var capVersion = sync.OnceValues(func() (uint32, error) { + var hdr capHeader + err := capget(&hdr, nil) + return hdr.version, err +}) + +func newPid(pid int) (c Capabilities, retErr error) { + ver, err := capVersion() + if err != nil { + retErr = fmt.Errorf("unable to get capability version from the kernel: %w", err) + return + } + switch ver { + case linuxCapVer1, linuxCapVer2: + retErr = errors.New("old/unsupported capability version (kernel older than 2.6.26?)") + default: + // Either linuxCapVer3, or an unknown/future version (such as v4). + // In the latter case, we fall back to v3 as the latest version known + // to this package, as kernel should be backward-compatible to v3. + p := new(capsV3) + p.hdr.version = linuxCapVer3 + p.hdr.pid = int32(pid) + c = p + } + return +} + +func ignoreEINVAL(err error) error { + if errors.Is(err, syscall.EINVAL) { + err = nil + } + return err +} + +type capsV3 struct { + hdr capHeader + data [2]capData + bounds [2]uint32 + ambient [2]uint32 +} + +func (c *capsV3) Get(which CapType, what Cap) bool { + var i uint + if what > 31 { + i = uint(what) >> 5 + what %= 32 + } + + switch which { + case EFFECTIVE: + return (1< 31 { + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data[i].effective |= 1 << uint(what) + } + if which&PERMITTED != 0 { + c.data[i].permitted |= 1 << uint(what) + } + if which&INHERITABLE != 0 { + c.data[i].inheritable |= 1 << uint(what) + } + if which&BOUNDING != 0 { + c.bounds[i] |= 1 << uint(what) + } + if which&AMBIENT != 0 { + c.ambient[i] |= 1 << uint(what) + } + } +} + +func (c *capsV3) Unset(which CapType, caps ...Cap) { + for _, what := range caps { + var i uint + if what > 31 { + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data[i].effective &= ^(1 << uint(what)) + } + if which&PERMITTED != 0 { + c.data[i].permitted &= ^(1 << uint(what)) + } + if which&INHERITABLE != 0 { + c.data[i].inheritable &= ^(1 << uint(what)) + } + if which&BOUNDING != 0 { + c.bounds[i] &= ^(1 << uint(what)) + } + if which&AMBIENT != 0 { + c.ambient[i] &= ^(1 << uint(what)) + } + } +} + +func (c *capsV3) Fill(kind CapType) { + if kind&CAPS == CAPS { + c.data[0].effective = 0xffffffff + c.data[0].permitted = 0xffffffff + c.data[0].inheritable = 0 + c.data[1].effective = 0xffffffff + c.data[1].permitted = 0xffffffff + c.data[1].inheritable = 0 + } + + if kind&BOUNDS == BOUNDS { + c.bounds[0] = 0xffffffff + c.bounds[1] = 0xffffffff + } + if kind&AMBS == AMBS { + c.ambient[0] = 0xffffffff + c.ambient[1] = 0xffffffff + } +} + +func (c *capsV3) Clear(kind CapType) { + if kind&CAPS == CAPS { + c.data[0].effective = 0 + c.data[0].permitted = 0 + c.data[0].inheritable = 0 + c.data[1].effective = 0 + c.data[1].permitted = 0 + c.data[1].inheritable = 0 + } + + if kind&BOUNDS == BOUNDS { + c.bounds[0] = 0 + c.bounds[1] = 0 + } + if kind&AMBS == AMBS { + c.ambient[0] = 0 + c.ambient[1] = 0 + } +} + +func (c *capsV3) StringCap(which CapType) (ret string) { + return mkStringCap(c, which) +} + +func (c *capsV3) String() (ret string) { + return mkString(c, BOUNDING) +} + +func (c *capsV3) Load() (err error) { + err = capget(&c.hdr, &c.data[0]) + if err != nil { + return + } + + path := "/proc/self/status" + if c.hdr.pid != 0 { + path = fmt.Sprintf("/proc/%d/status", c.hdr.pid) + } + + f, err := os.Open(path) + if err != nil { + return + } + b := bufio.NewReader(f) + for { + line, e := b.ReadString('\n') + if e != nil { + if e != io.EOF { + err = e + } + break + } + if val, ok := strings.CutPrefix(line, "CapBnd:\t"); ok { + _, err = fmt.Sscanf(val, "%08x%08x", &c.bounds[1], &c.bounds[0]) + if err != nil { + break + } + continue + } + if val, ok := strings.CutPrefix(line, "CapAmb:\t"); ok { + _, err = fmt.Sscanf(val, "%08x%08x", &c.ambient[1], &c.ambient[0]) + if err != nil { + break + } + continue + } + } + f.Close() + + return +} + +func (c *capsV3) Apply(kind CapType) error { + if c.hdr.pid != 0 { + return errors.New("unable to modify capabilities of another process") + } + last, err := LastCap() + if err != nil { + return err + } + if kind&BOUNDS == BOUNDS { + var data [2]capData + err = capget(&c.hdr, &data[0]) + if err != nil { + return err + } + if (1< 0, nil +} + +func setAmbient(raise bool, caps ...Cap) error { + op := pr_CAP_AMBIENT_RAISE + if !raise { + op = pr_CAP_AMBIENT_LOWER + } + for _, val := range caps { + err := prctl(pr_CAP_AMBIENT, op, uintptr(val)) + if err != nil { + return err + } + } + return nil +} + +func resetAmbient() error { + return prctl(pr_CAP_AMBIENT, pr_CAP_AMBIENT_CLEAR_ALL, 0) +} + +func getBound(c Cap) (bool, error) { + res, err := prctlRetInt(syscall.PR_CAPBSET_READ, uintptr(c), 0) + if err != nil { + return false, err + } + return res > 0, nil +} + +func dropBound(caps ...Cap) error { + for _, val := range caps { + err := prctl(syscall.PR_CAPBSET_DROP, uintptr(val), 0) + if err != nil { + return err + } + } + return nil +} + +func newFile(path string) (c Capabilities, err error) { + c = &capsFile{path: path} + return +} + +type capsFile struct { + path string + data vfscapData +} + +func (c *capsFile) Get(which CapType, what Cap) bool { + var i uint + if what > 31 { + if c.data.version == 1 { + return false + } + i = uint(what) >> 5 + what %= 32 + } + + switch which { + case EFFECTIVE: + return (1< 31 { + if c.data.version == 1 { + continue + } + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data.effective[i] |= 1 << uint(what) + } + if which&PERMITTED != 0 { + c.data.data[i].permitted |= 1 << uint(what) + } + if which&INHERITABLE != 0 { + c.data.data[i].inheritable |= 1 << uint(what) + } + } +} + +func (c *capsFile) Unset(which CapType, caps ...Cap) { + for _, what := range caps { + var i uint + if what > 31 { + if c.data.version == 1 { + continue + } + i = uint(what) >> 5 + what %= 32 + } + + if which&EFFECTIVE != 0 { + c.data.effective[i] &= ^(1 << uint(what)) + } + if which&PERMITTED != 0 { + c.data.data[i].permitted &= ^(1 << uint(what)) + } + if which&INHERITABLE != 0 { + c.data.data[i].inheritable &= ^(1 << uint(what)) + } + } +} + +func (c *capsFile) Fill(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective[0] = 0xffffffff + c.data.data[0].permitted = 0xffffffff + c.data.data[0].inheritable = 0 + if c.data.version == 2 { + c.data.effective[1] = 0xffffffff + c.data.data[1].permitted = 0xffffffff + c.data.data[1].inheritable = 0 + } + } +} + +func (c *capsFile) Clear(kind CapType) { + if kind&CAPS == CAPS { + c.data.effective[0] = 0 + c.data.data[0].permitted = 0 + c.data.data[0].inheritable = 0 + if c.data.version == 2 { + c.data.effective[1] = 0 + c.data.data[1].permitted = 0 + c.data.data[1].inheritable = 0 + } + } +} + +func (c *capsFile) StringCap(which CapType) (ret string) { + return mkStringCap(c, which) +} + +func (c *capsFile) String() (ret string) { + return mkString(c, INHERITABLE) +} + +func (c *capsFile) Load() (err error) { + return getVfsCap(c.path, &c.data) +} + +func (c *capsFile) Apply(kind CapType) (err error) { + if kind&CAPS == CAPS { + return setVfsCap(c.path, &c.data) + } + return +} diff --git a/vendor/github.com/moby/sys/capability/capability_noop.go b/vendor/github.com/moby/sys/capability/capability_noop.go new file mode 100644 index 0000000000..b766e444f3 --- /dev/null +++ b/vendor/github.com/moby/sys/capability/capability_noop.go @@ -0,0 +1,46 @@ +// Copyright 2023 The Capability Authors. +// Copyright 2013 Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux + +package capability + +import "errors" + +var errNotSup = errors.New("not supported") + +func newPid(_ int) (Capabilities, error) { + return nil, errNotSup +} + +func newFile(_ string) (Capabilities, error) { + return nil, errNotSup +} + +func lastCap() (Cap, error) { + return -1, errNotSup +} + +func getAmbient(_ Cap) (bool, error) { + return false, errNotSup +} + +func setAmbient(_ bool, _ ...Cap) error { + return errNotSup +} + +func resetAmbient() error { + return errNotSup +} + +func getBound(_ Cap) (bool, error) { + return false, errNotSup +} + +func dropBound(_ ...Cap) error { + return errNotSup +} diff --git a/vendor/github.com/moby/sys/capability/enum.go b/vendor/github.com/moby/sys/capability/enum.go new file mode 100644 index 0000000000..f88593310e --- /dev/null +++ b/vendor/github.com/moby/sys/capability/enum.go @@ -0,0 +1,330 @@ +// Copyright 2024 The Capability Authors. +// Copyright 2013 Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package capability + +import "slices" + +type CapType uint + +func (c CapType) String() string { + switch c { + case EFFECTIVE: + return "effective" + case PERMITTED: + return "permitted" + case INHERITABLE: + return "inheritable" + case BOUNDING: + return "bounding" + case CAPS: + return "caps" + case AMBIENT: + return "ambient" + } + return "unknown" +} + +const ( + EFFECTIVE CapType = 1 << iota + PERMITTED + INHERITABLE + BOUNDING + AMBIENT + + CAPS = EFFECTIVE | PERMITTED | INHERITABLE + BOUNDS = BOUNDING + AMBS = AMBIENT +) + +//go:generate go run enumgen/gen.go +type Cap int + +// POSIX-draft defined capabilities and Linux extensions. +// +// Defined in https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h +const ( + // In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this + // overrides the restriction of changing file ownership and group + // ownership. + CAP_CHOWN = Cap(0) + + // Override all DAC access, including ACL execute access if + // [_POSIX_ACL] is defined. Excluding DAC access covered by + // CAP_LINUX_IMMUTABLE. + CAP_DAC_OVERRIDE = Cap(1) + + // Overrides all DAC restrictions regarding read and search on files + // and directories, including ACL restrictions if [_POSIX_ACL] is + // defined. Excluding DAC access covered by CAP_LINUX_IMMUTABLE. + CAP_DAC_READ_SEARCH = Cap(2) + + // Overrides all restrictions about allowed operations on files, where + // file owner ID must be equal to the user ID, except where CAP_FSETID + // is applicable. It doesn't override MAC and DAC restrictions. + CAP_FOWNER = Cap(3) + + // Overrides the following restrictions that the effective user ID + // shall match the file owner ID when setting the S_ISUID and S_ISGID + // bits on that file; that the effective group ID (or one of the + // supplementary group IDs) shall match the file owner ID when setting + // the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are + // cleared on successful return from chown(2) (not implemented). + CAP_FSETID = Cap(4) + + // Overrides the restriction that the real or effective user ID of a + // process sending a signal must match the real or effective user ID + // of the process receiving the signal. + CAP_KILL = Cap(5) + + // Allows setgid(2) manipulation + // Allows setgroups(2) + // Allows forged gids on socket credentials passing. + CAP_SETGID = Cap(6) + + // Allows set*uid(2) manipulation (including fsuid). + // Allows forged pids on socket credentials passing. + CAP_SETUID = Cap(7) + + // Linux-specific capabilities + + // Without VFS support for capabilities: + // Transfer any capability in your permitted set to any pid, + // remove any capability in your permitted set from any pid + // With VFS support for capabilities (neither of above, but) + // Add any capability from current's capability bounding set + // to the current process' inheritable set + // Allow taking bits out of capability bounding set + // Allow modification of the securebits for a process + CAP_SETPCAP = Cap(8) + + // Allow modification of S_IMMUTABLE and S_APPEND file attributes + CAP_LINUX_IMMUTABLE = Cap(9) + + // Allows binding to TCP/UDP sockets below 1024 + // Allows binding to ATM VCIs below 32 + CAP_NET_BIND_SERVICE = Cap(10) + + // Allow broadcasting, listen to multicast + CAP_NET_BROADCAST = Cap(11) + + // Allow interface configuration + // Allow administration of IP firewall, masquerading and accounting + // Allow setting debug option on sockets + // Allow modification of routing tables + // Allow setting arbitrary process / process group ownership on + // sockets + // Allow binding to any address for transparent proxying (also via NET_RAW) + // Allow setting TOS (type of service) + // Allow setting promiscuous mode + // Allow clearing driver statistics + // Allow multicasting + // Allow read/write of device-specific registers + // Allow activation of ATM control sockets + CAP_NET_ADMIN = Cap(12) + + // Allow use of RAW sockets + // Allow use of PACKET sockets + // Allow binding to any address for transparent proxying (also via NET_ADMIN) + CAP_NET_RAW = Cap(13) + + // Allow locking of shared memory segments + // Allow mlock and mlockall (which doesn't really have anything to do + // with IPC) + CAP_IPC_LOCK = Cap(14) + + // Override IPC ownership checks + CAP_IPC_OWNER = Cap(15) + + // Insert and remove kernel modules - modify kernel without limit + CAP_SYS_MODULE = Cap(16) + + // Allow ioperm/iopl access + // Allow sending USB messages to any device via /proc/bus/usb + CAP_SYS_RAWIO = Cap(17) + + // Allow use of chroot() + CAP_SYS_CHROOT = Cap(18) + + // Allow ptrace() of any process + CAP_SYS_PTRACE = Cap(19) + + // Allow configuration of process accounting + CAP_SYS_PACCT = Cap(20) + + // Allow configuration of the secure attention key + // Allow administration of the random device + // Allow examination and configuration of disk quotas + // Allow setting the domainname + // Allow setting the hostname + // Allow calling bdflush() + // Allow mount() and umount(), setting up new smb connection + // Allow some autofs root ioctls + // Allow nfsservctl + // Allow VM86_REQUEST_IRQ + // Allow to read/write pci config on alpha + // Allow irix_prctl on mips (setstacksize) + // Allow flushing all cache on m68k (sys_cacheflush) + // Allow removing semaphores + // Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores + // and shared memory + // Allow locking/unlocking of shared memory segment + // Allow turning swap on/off + // Allow forged pids on socket credentials passing + // Allow setting readahead and flushing buffers on block devices + // Allow setting geometry in floppy driver + // Allow turning DMA on/off in xd driver + // Allow administration of md devices (mostly the above, but some + // extra ioctls) + // Allow tuning the ide driver + // Allow access to the nvram device + // Allow administration of apm_bios, serial and bttv (TV) device + // Allow manufacturer commands in isdn CAPI support driver + // Allow reading non-standardized portions of pci configuration space + // Allow DDI debug ioctl on sbpcd driver + // Allow setting up serial ports + // Allow sending raw qic-117 commands + // Allow enabling/disabling tagged queuing on SCSI controllers and sending + // arbitrary SCSI commands + // Allow setting encryption key on loopback filesystem + // Allow setting zone reclaim policy + // Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility + CAP_SYS_ADMIN = Cap(21) + + // Allow use of reboot() + CAP_SYS_BOOT = Cap(22) + + // Allow raising priority and setting priority on other (different + // UID) processes + // Allow use of FIFO and round-robin (realtime) scheduling on own + // processes and setting the scheduling algorithm used by another + // process. + // Allow setting cpu affinity on other processes + CAP_SYS_NICE = Cap(23) + + // Override resource limits. Set resource limits. + // Override quota limits. + // Override reserved space on ext2 filesystem + // Modify data journaling mode on ext3 filesystem (uses journaling + // resources) + // NOTE: ext2 honors fsuid when checking for resource overrides, so + // you can override using fsuid too + // Override size restrictions on IPC message queues + // Allow more than 64hz interrupts from the real-time clock + // Override max number of consoles on console allocation + // Override max number of keymaps + // Control memory reclaim behavior + CAP_SYS_RESOURCE = Cap(24) + + // Allow manipulation of system clock + // Allow irix_stime on mips + // Allow setting the real-time clock + CAP_SYS_TIME = Cap(25) + + // Allow configuration of tty devices + // Allow vhangup() of tty + CAP_SYS_TTY_CONFIG = Cap(26) + + // Allow the privileged aspects of mknod() + CAP_MKNOD = Cap(27) + + // Allow taking of leases on files + CAP_LEASE = Cap(28) + + CAP_AUDIT_WRITE = Cap(29) + CAP_AUDIT_CONTROL = Cap(30) + CAP_SETFCAP = Cap(31) + + // Override MAC access. + // The base kernel enforces no MAC policy. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based overrides of that policy, this is + // the capability it should use to do so. + CAP_MAC_OVERRIDE = Cap(32) + + // Allow MAC configuration or state changes. + // The base kernel requires no MAC configuration. + // An LSM may enforce a MAC policy, and if it does and it chooses + // to implement capability based checks on modifications to that + // policy or the data required to maintain it, this is the + // capability it should use to do so. + CAP_MAC_ADMIN = Cap(33) + + // Allow configuring the kernel's syslog (printk behaviour) + CAP_SYSLOG = Cap(34) + + // Allow triggering something that will wake the system + CAP_WAKE_ALARM = Cap(35) + + // Allow preventing system suspends + CAP_BLOCK_SUSPEND = Cap(36) + + // Allow reading the audit log via multicast netlink socket + CAP_AUDIT_READ = Cap(37) + + // Allow system performance and observability privileged operations + // using perf_events, i915_perf and other kernel subsystems + CAP_PERFMON = Cap(38) + + // CAP_BPF allows the following BPF operations: + // - Creating all types of BPF maps + // - Advanced verifier features + // - Indirect variable access + // - Bounded loops + // - BPF to BPF function calls + // - Scalar precision tracking + // - Larger complexity limits + // - Dead code elimination + // - And potentially other features + // - Loading BPF Type Format (BTF) data + // - Retrieve xlated and JITed code of BPF programs + // - Use bpf_spin_lock() helper + // + // CAP_PERFMON relaxes the verifier checks further: + // - BPF progs can use of pointer-to-integer conversions + // - speculation attack hardening measures are bypassed + // - bpf_probe_read to read arbitrary kernel memory is allowed + // - bpf_trace_printk to print kernel memory is allowed + // + // CAP_SYS_ADMIN is required to use bpf_probe_write_user. + // + // CAP_SYS_ADMIN is required to iterate system wide loaded + // programs, maps, links, BTFs and convert their IDs to file descriptors. + // + // CAP_PERFMON and CAP_BPF are required to load tracing programs. + // CAP_NET_ADMIN and CAP_BPF are required to load networking programs. + CAP_BPF = Cap(39) + + // Allow checkpoint/restore related operations. + // Introduced in kernel 5.9 + CAP_CHECKPOINT_RESTORE = Cap(40) +) + +// List returns the list of all capabilities known to the package. +// +// Deprecated: use [ListKnown] or [ListSupported] instead. +func List() []Cap { + return ListKnown() +} + +// ListKnown returns the list of all capabilities known to the package. +func ListKnown() []Cap { + return list() +} + +// ListSupported returns the list of all capabilities known to the package, +// except those that are not supported by the currently running Linux kernel. +func ListSupported() ([]Cap, error) { + last, err := LastCap() + if err != nil { + return nil, err + } + return slices.DeleteFunc(list(), func(c Cap) bool { + // Remove caps not supported by the kernel. + return c > last + }), nil +} diff --git a/vendor/github.com/moby/sys/capability/enum_gen.go b/vendor/github.com/moby/sys/capability/enum_gen.go new file mode 100644 index 0000000000..f72cd43a6e --- /dev/null +++ b/vendor/github.com/moby/sys/capability/enum_gen.go @@ -0,0 +1,137 @@ +// Code generated by go generate; DO NOT EDIT. + +package capability + +func (c Cap) String() string { + switch c { + case CAP_CHOWN: + return "chown" + case CAP_DAC_OVERRIDE: + return "dac_override" + case CAP_DAC_READ_SEARCH: + return "dac_read_search" + case CAP_FOWNER: + return "fowner" + case CAP_FSETID: + return "fsetid" + case CAP_KILL: + return "kill" + case CAP_SETGID: + return "setgid" + case CAP_SETUID: + return "setuid" + case CAP_SETPCAP: + return "setpcap" + case CAP_LINUX_IMMUTABLE: + return "linux_immutable" + case CAP_NET_BIND_SERVICE: + return "net_bind_service" + case CAP_NET_BROADCAST: + return "net_broadcast" + case CAP_NET_ADMIN: + return "net_admin" + case CAP_NET_RAW: + return "net_raw" + case CAP_IPC_LOCK: + return "ipc_lock" + case CAP_IPC_OWNER: + return "ipc_owner" + case CAP_SYS_MODULE: + return "sys_module" + case CAP_SYS_RAWIO: + return "sys_rawio" + case CAP_SYS_CHROOT: + return "sys_chroot" + case CAP_SYS_PTRACE: + return "sys_ptrace" + case CAP_SYS_PACCT: + return "sys_pacct" + case CAP_SYS_ADMIN: + return "sys_admin" + case CAP_SYS_BOOT: + return "sys_boot" + case CAP_SYS_NICE: + return "sys_nice" + case CAP_SYS_RESOURCE: + return "sys_resource" + case CAP_SYS_TIME: + return "sys_time" + case CAP_SYS_TTY_CONFIG: + return "sys_tty_config" + case CAP_MKNOD: + return "mknod" + case CAP_LEASE: + return "lease" + case CAP_AUDIT_WRITE: + return "audit_write" + case CAP_AUDIT_CONTROL: + return "audit_control" + case CAP_SETFCAP: + return "setfcap" + case CAP_MAC_OVERRIDE: + return "mac_override" + case CAP_MAC_ADMIN: + return "mac_admin" + case CAP_SYSLOG: + return "syslog" + case CAP_WAKE_ALARM: + return "wake_alarm" + case CAP_BLOCK_SUSPEND: + return "block_suspend" + case CAP_AUDIT_READ: + return "audit_read" + case CAP_PERFMON: + return "perfmon" + case CAP_BPF: + return "bpf" + case CAP_CHECKPOINT_RESTORE: + return "checkpoint_restore" + } + return "unknown" +} + +func list() []Cap { + return []Cap{ + CAP_CHOWN, + CAP_DAC_OVERRIDE, + CAP_DAC_READ_SEARCH, + CAP_FOWNER, + CAP_FSETID, + CAP_KILL, + CAP_SETGID, + CAP_SETUID, + CAP_SETPCAP, + CAP_LINUX_IMMUTABLE, + CAP_NET_BIND_SERVICE, + CAP_NET_BROADCAST, + CAP_NET_ADMIN, + CAP_NET_RAW, + CAP_IPC_LOCK, + CAP_IPC_OWNER, + CAP_SYS_MODULE, + CAP_SYS_RAWIO, + CAP_SYS_CHROOT, + CAP_SYS_PTRACE, + CAP_SYS_PACCT, + CAP_SYS_ADMIN, + CAP_SYS_BOOT, + CAP_SYS_NICE, + CAP_SYS_RESOURCE, + CAP_SYS_TIME, + CAP_SYS_TTY_CONFIG, + CAP_MKNOD, + CAP_LEASE, + CAP_AUDIT_WRITE, + CAP_AUDIT_CONTROL, + CAP_SETFCAP, + CAP_MAC_OVERRIDE, + CAP_MAC_ADMIN, + CAP_SYSLOG, + CAP_WAKE_ALARM, + CAP_BLOCK_SUSPEND, + CAP_AUDIT_READ, + CAP_PERFMON, + CAP_BPF, + CAP_CHECKPOINT_RESTORE, + } +} diff --git a/vendor/github.com/moby/sys/capability/syscall_linux.go b/vendor/github.com/moby/sys/capability/syscall_linux.go new file mode 100644 index 0000000000..2d8faa85ff --- /dev/null +++ b/vendor/github.com/moby/sys/capability/syscall_linux.go @@ -0,0 +1,161 @@ +// Copyright 2024 The Capability Authors. +// Copyright 2013 Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package capability + +import ( + "syscall" + "unsafe" +) + +type capHeader struct { + version uint32 + pid int32 +} + +type capData struct { + effective uint32 + permitted uint32 + inheritable uint32 +} + +func capget(hdr *capHeader, data *capData) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) + if e1 != 0 { + err = e1 + } + return +} + +func capset(hdr *capHeader, data *capData) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) + if e1 != 0 { + err = e1 + } + return +} + +// not yet in syscall +const ( + pr_CAP_AMBIENT = 47 + pr_CAP_AMBIENT_IS_SET = uintptr(1) + pr_CAP_AMBIENT_RAISE = uintptr(2) + pr_CAP_AMBIENT_LOWER = uintptr(3) + pr_CAP_AMBIENT_CLEAR_ALL = uintptr(4) +) + +func prctl(option int, arg2, arg3 uintptr) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_PRCTL, uintptr(option), arg2, arg3) + if e1 != 0 { + err = e1 + } + return +} + +func prctlRetInt(option int, arg2, arg3 uintptr) (int, error) { + ret, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, uintptr(option), arg2, arg3) + if err != 0 { + return 0, err + } + return int(ret), nil +} + +const ( + vfsXattrName = "security.capability" + + vfsCapVerMask = 0xff000000 + vfsCapVer1 = 0x01000000 + vfsCapVer2 = 0x02000000 + + vfsCapFlagMask = ^vfsCapVerMask + vfsCapFlageffective = 0x000001 + + vfscapDataSizeV1 = 4 * (1 + 2*1) + vfscapDataSizeV2 = 4 * (1 + 2*2) +) + +type vfscapData struct { + magic uint32 + data [2]struct { + permitted uint32 + inheritable uint32 + } + effective [2]uint32 + version int8 +} + +var _vfsXattrName *byte + +func init() { + _vfsXattrName, _ = syscall.BytePtrFromString(vfsXattrName) +} + +func getVfsCap(path string, dest *vfscapData) (err error) { + var _p0 *byte + _p0, err = syscall.BytePtrFromString(path) + if err != nil { + return + } + r0, _, e1 := syscall.RawSyscall6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(dest)), vfscapDataSizeV2, 0, 0) + if e1 != 0 { + if e1 == syscall.ENODATA { + dest.version = 2 + return + } + err = e1 + } + switch dest.magic & vfsCapVerMask { + case vfsCapVer1: + dest.version = 1 + if r0 != vfscapDataSizeV1 { + return syscall.EINVAL + } + dest.data[1].permitted = 0 + dest.data[1].inheritable = 0 + case vfsCapVer2: + dest.version = 2 + if r0 != vfscapDataSizeV2 { + return syscall.EINVAL + } + default: + return syscall.EINVAL + } + if dest.magic&vfsCapFlageffective != 0 { + dest.effective[0] = dest.data[0].permitted | dest.data[0].inheritable + dest.effective[1] = dest.data[1].permitted | dest.data[1].inheritable + } else { + dest.effective[0] = 0 + dest.effective[1] = 0 + } + return +} + +func setVfsCap(path string, data *vfscapData) (err error) { + var _p0 *byte + _p0, err = syscall.BytePtrFromString(path) + if err != nil { + return + } + var size uintptr + if data.version == 1 { + data.magic = vfsCapVer1 + size = vfscapDataSizeV1 + } else if data.version == 2 { + data.magic = vfsCapVer2 + if data.effective[0] != 0 || data.effective[1] != 0 { + data.magic |= vfsCapFlageffective + } + size = vfscapDataSizeV2 + } else { + return syscall.EINVAL + } + _, _, e1 := syscall.RawSyscall6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(data)), size, 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/vendor/github.com/mrunalp/fileutils/.gitignore b/vendor/github.com/mrunalp/fileutils/.gitignore new file mode 100644 index 0000000000..aac977bcae --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/.gitignore @@ -0,0 +1 @@ +/gocp diff --git a/vendor/github.com/mrunalp/fileutils/LICENSE b/vendor/github.com/mrunalp/fileutils/LICENSE new file mode 100644 index 0000000000..27448585ad --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2014 Docker, Inc. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/mrunalp/fileutils/MAINTAINERS b/vendor/github.com/mrunalp/fileutils/MAINTAINERS new file mode 100644 index 0000000000..4a2cafa5c4 --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/MAINTAINERS @@ -0,0 +1 @@ +Mrunal Patel (@mrunalp) diff --git a/vendor/github.com/mrunalp/fileutils/README.md b/vendor/github.com/mrunalp/fileutils/README.md new file mode 100644 index 0000000000..6cb4140eae --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/README.md @@ -0,0 +1,5 @@ +# fileutils + +Collection of utilities for file manipulation in golang + +The library is based on docker pkg/archive pkg/idtools but does copies instead of handling archive formats. diff --git a/vendor/github.com/mrunalp/fileutils/fileutils.go b/vendor/github.com/mrunalp/fileutils/fileutils.go new file mode 100644 index 0000000000..81851c8194 --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/fileutils.go @@ -0,0 +1,171 @@ +package fileutils + +import ( + "fmt" + "io" + "os" + "path/filepath" + "syscall" +) + +// CopyFile copies the file at source to dest +func CopyFile(source string, dest string) error { + si, err := os.Lstat(source) + if err != nil { + return err + } + + st, ok := si.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + uid := int(st.Uid) + gid := int(st.Gid) + modeType := si.Mode() & os.ModeType + + // Handle symlinks + if modeType == os.ModeSymlink { + target, err := os.Readlink(source) + if err != nil { + return err + } + if err := os.Symlink(target, dest); err != nil { + return err + } + } + + // Handle device files + if modeType == os.ModeDevice { + devMajor := int64(major(uint64(st.Rdev))) + devMinor := int64(minor(uint64(st.Rdev))) + mode := uint32(si.Mode() & os.ModePerm) + if si.Mode()&os.ModeCharDevice != 0 { + mode |= syscall.S_IFCHR + } else { + mode |= syscall.S_IFBLK + } + if err := syscall.Mknod(dest, mode, int(mkdev(devMajor, devMinor))); err != nil { + return err + } + } + + // Handle regular files + if si.Mode().IsRegular() { + err = copyInternal(source, dest) + if err != nil { + return err + } + } + + // Chown the file + if err := os.Lchown(dest, uid, gid); err != nil { + return err + } + + // Chmod the file + if !(modeType == os.ModeSymlink) { + if err := os.Chmod(dest, si.Mode()); err != nil { + return err + } + } + + return nil +} + +func copyInternal(source, dest string) (retErr error) { + sf, err := os.Open(source) + if err != nil { + return err + } + defer sf.Close() + + df, err := os.Create(dest) + if err != nil { + return err + } + defer func() { + err := df.Close() + if retErr == nil { + retErr = err + } + }() + + _, err = io.Copy(df, sf) + return err +} + +// CopyDirectory copies the files under the source directory +// to dest directory. The dest directory is created if it +// does not exist. +func CopyDirectory(source string, dest string) error { + fi, err := os.Stat(source) + if err != nil { + return err + } + + // Get owner. + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + // We have to pick an owner here anyway. + if err := MkdirAllNewAs(dest, fi.Mode(), int(st.Uid), int(st.Gid)); err != nil { + return err + } + + return filepath.Walk(source, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Get the relative path + relPath, err := filepath.Rel(source, path) + if err != nil { + return nil + } + destPath := filepath.Join(dest, relPath) + + if info.IsDir() { + // Skip the source directory. + if path != source { + // Get the owner. + st, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return fmt.Errorf("could not convert to syscall.Stat_t") + } + + uid := int(st.Uid) + gid := int(st.Gid) + + if err := os.Mkdir(destPath, info.Mode()); err != nil { + return err + } + if err := os.Lchown(destPath, uid, gid); err != nil { + return err + } + if err := os.Chmod(destPath, info.Mode()); err != nil { + return err + } + } + return nil + } + + return CopyFile(path, destPath) + }) +} + +// Gives a number indicating the device driver to be used to access the passed device +func major(device uint64) uint64 { + return (device >> 8) & 0xfff +} + +// Gives a number that serves as a flag to the device driver for the passed device +func minor(device uint64) uint64 { + return (device & 0xff) | ((device >> 12) & 0xfff00) +} + +func mkdev(major int64, minor int64) uint32 { + return uint32(((minor & 0xfff00) << 12) | ((major & 0xfff) << 8) | (minor & 0xff)) +} diff --git a/vendor/github.com/mrunalp/fileutils/idtools.go b/vendor/github.com/mrunalp/fileutils/idtools.go new file mode 100644 index 0000000000..0ae2dfb29f --- /dev/null +++ b/vendor/github.com/mrunalp/fileutils/idtools.go @@ -0,0 +1,57 @@ +package fileutils + +import ( + "os" + "path/filepath" + "syscall" +) + +// MkdirAllNewAs creates a directory (include any along the path) and then modifies +// ownership ONLY of newly created directories to the requested uid/gid. If the +// directories along the path exist, no change of ownership will be performed +func MkdirAllNewAs(path string, mode os.FileMode, ownerUID, ownerGID int) error { + // make an array containing the original path asked for, plus (for mkAll == true) + // all path components leading up to the complete path that don't exist before we MkdirAll + // so that we can chown all of them properly at the end. If chownExisting is false, we won't + // chown the full directory path if it exists + var paths []string + st, err := os.Stat(path) + if err != nil && os.IsNotExist(err) { + paths = []string{path} + } else if err == nil { + if !st.IsDir() { + return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR} + } + // nothing to do; directory path fully exists already + return nil + } + + // walk back to "/" looking for directories which do not exist + // and add them to the paths array for chown after creation + dirPath := path + for { + dirPath = filepath.Dir(dirPath) + if dirPath == "/" { + break + } + if _, err := os.Stat(dirPath); err != nil && os.IsNotExist(err) { + paths = append(paths, dirPath) + } + } + + if err := os.MkdirAll(path, mode); err != nil { + return err + } + + // even if it existed, we will chown the requested path + any subpaths that + // didn't exist when we called MkdirAll + for _, pathComponent := range paths { + if err := os.Chown(pathComponent, ownerUID, ownerGID); err != nil { + return err + } + if err := os.Chmod(pathComponent, mode); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml b/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml new file mode 100644 index 0000000000..b98dba1ba1 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/.golangci-extra.yml @@ -0,0 +1,21 @@ +# This is golangci-lint config file which is used to check NEW code in +# github PRs only (see lint-extra in .github/workflows/validate.yml). +# +# For the default linter config, see .golangci.yml. This config should +# only enable additional linters and/or linter settings not enabled +# in the default config. +version: "2" + +linters: + default: none + enable: + - godot + - revive + - staticcheck + settings: + staticcheck: + checks: + - all + - -QF1008 # https://staticcheck.dev/docs/checks/#QF1008 Omit embedded fields from selector expression. + exclusions: + generated: strict diff --git a/vendor/github.com/opencontainers/cgroups/.golangci.yml b/vendor/github.com/opencontainers/cgroups/.golangci.yml new file mode 100644 index 0000000000..90799883ca --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/.golangci.yml @@ -0,0 +1,31 @@ +# For documentation, see https://golangci-lint.run/usage/configuration/ +version: "2" + +formatters: + enable: + - gofumpt + exclusions: + generated: strict + +linters: + enable: + - errorlint + - nolintlint + - unconvert + - unparam + settings: + govet: + enable: + - nilness + staticcheck: + checks: + - all + - -ST1000 # https://staticcheck.dev/docs/checks/#ST1000 Incorrect or missing package comment. + - -ST1003 # https://staticcheck.dev/docs/checks/#ST1003 Poorly chosen identifier. + - -ST1005 # https://staticcheck.dev/docs/checks/#ST1005 Incorrectly formatted error string. + - -QF1008 # https://staticcheck.dev/docs/checks/#QF1008 Omit embedded fields from selector expression. + exclusions: + generated: strict + presets: + - comments + - std-error-handling diff --git a/vendor/github.com/opencontainers/cgroups/CODEOWNERS b/vendor/github.com/opencontainers/cgroups/CODEOWNERS new file mode 100644 index 0000000000..7201e35ac9 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/CODEOWNERS @@ -0,0 +1 @@ +* @maintainer1 @maintainer2 @maintainer3 diff --git a/vendor/github.com/opencontainers/cgroups/CONTRIBUTING.md b/vendor/github.com/opencontainers/cgroups/CONTRIBUTING.md new file mode 100644 index 0000000000..135abcf02f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/CONTRIBUTING.md @@ -0,0 +1,150 @@ +# Contribution Guidelines + +Development happens on GitHub. +Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). + +The content of this repository is licensed under the [Apache License, Version 2.0](LICENSE). + +## Code of Conduct + +Participation in the Open Container community is governed by [Open Container Code of Conduct][code-of-conduct]. + +## Meetings + +The contributors and maintainers of all OCI projects have monthly meetings at 2:00 PM (USA Pacific) on the first Wednesday of every month. +There is an [iCalendar][rfc5545] format for the meetings [here][meeting.ics]. +Everyone is welcome to participate via [UberConference web][UberConference] or audio-only: +1 415 968 0849 (no PIN needed). +An initial agenda will be posted to the [mailing list](#mailing-list) in the week before each meeting, and everyone is welcome to propose additional topics or suggest other agenda alterations there. +Minutes from past meetings are archived [here][minutes]. + +## Mailing list + +You can subscribe and browse the mailing list on [Google Groups][mailing-list]. + +## IRC + +OCI discussion happens on #opencontainers on [Freenode][] ([logs][irc-logs]). + +## Git + +### Security issues + +If you are reporting a security issue, do not create an issue or file a pull +request on GitHub. Instead, disclose the issue responsibly by sending an email +to security@opencontainers.org (which is inhabited only by the maintainers of +the various OCI projects). + +### Pull requests are always welcome + +We are always thrilled to receive pull requests, and do our best to +process them as fast as possible. Not sure if that typo is worth a pull +request? Do it! We will appreciate it. + +If your pull request is not accepted on the first try, don't be +discouraged! If there's a problem with the implementation, hopefully you +received feedback on what to improve. + +We're trying very hard to keep the project lean and focused. We don't want it +to do everything for everybody. This means that we might decide against +incorporating a new feature. + +### Conventions + +Fork the repo and make changes on your fork in a feature branch. +For larger bugs and enhancements, consider filing a leader issue or mailing-list thread for discussion that is independent of the implementation. +Small changes or changes that have been discussed on the [project mailing list](#mailing-list) may be submitted without a leader issue. + +If the project has a test suite, submit unit tests for your changes. Take a +look at existing tests for inspiration. Run the full test suite on your branch +before submitting a pull request. + +Update the documentation when creating or modifying features. Test +your documentation changes for clarity, concision, and correctness, as +well as a clean documentation build. + +Pull requests descriptions should be as clear as possible and include a +reference to all the issues that they address. + +Commit messages must start with a capitalized and short summary +written in the imperative, followed by an optional, more detailed +explanatory text which is separated from the summary by an empty line. + +Code review comments may be added to your pull request. Discuss, then make the +suggested modifications and push additional commits to your feature branch. Be +sure to post a comment after pushing. The new commits will show up in the pull +request automatically, but the reviewers will not be notified unless you +comment. + +Before the pull request is merged, make sure that you squash your commits into +logical units of work using `git rebase -i` and `git push -f`. After every +commit the test suite (if any) should be passing. Include documentation changes +in the same commit so that a revert would remove all traces of the feature or +fix. + +Commits that fix or close an issue should include a reference like `Closes #XXX` +or `Fixes #XXX`, which will automatically close the issue when merged. + +### Sign your work + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below (from [developercertificate.org][]): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +1 Letterman Drive +Suite D4700 +San Francisco, CA, 94129 + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +using your real name (sorry, no pseudonyms or anonymous contributions.) + +You can add the sign off when creating the git commit via `git commit -s`. + +[code-of-conduct]: https://github.com/opencontainers/tob/blob/d2f9d68c1332870e40693fe077d311e0742bc73d/code-of-conduct.md +[developercertificate.org]: http://developercertificate.org/ +[Freenode]: https://freenode.net/ +[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/ +[mailing-list]: https://groups.google.com/a/opencontainers.org/forum/#!forum/dev +[meeting.ics]: https://github.com/opencontainers/runtime-spec/blob/master/meeting.ics +[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/ +[rfc5545]: https://tools.ietf.org/html/rfc5545 +[UberConference]: https://www.uberconference.com/opencontainers diff --git a/vendor/github.com/opencontainers/cgroups/GOVERNANCE.md b/vendor/github.com/opencontainers/cgroups/GOVERNANCE.md new file mode 100644 index 0000000000..3b7b32f788 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/GOVERNANCE.md @@ -0,0 +1,63 @@ +# Project governance + +The [OCI charter][charter] §5.b.viii tasks an OCI Project's maintainers (listed in the repository's MAINTAINERS file and sometimes referred to as "the TDC", [§5.e][charter]) with: + +> Creating, maintaining and enforcing governance guidelines for the TDC, approved by the maintainers, and which shall be posted visibly for the TDC. + +This section describes generic rules and procedures for fulfilling that mandate. + +## Proposing a motion + +A maintainer SHOULD propose a motion on the dev@opencontainers.org mailing list (except [security issues](#security-issues)) with another maintainer as a co-sponsor. + +## Voting + +Voting on a proposed motion SHOULD happen on the dev@opencontainers.org mailing list (except [security issues](#security-issues)) with maintainers posting LGTM or REJECT. +Maintainers MAY also explicitly not vote by posting ABSTAIN (which is useful to revert a previous vote). +Maintainers MAY post multiple times (e.g. as they revise their position based on feedback), but only their final post counts in the tally. +A proposed motion is adopted if two-thirds of votes cast, a quorum having voted, are in favor of the release. + +Voting SHOULD remain open for a week to collect feedback from the wider community and allow the maintainers to digest the proposed motion. +Under exceptional conditions (e.g. non-major security fix releases) proposals which reach quorum with unanimous support MAY be adopted earlier. + +A maintainer MAY choose to reply with REJECT. +A maintainer posting a REJECT MUST include a list of concerns or links to written documentation for those concerns (e.g. GitHub issues or mailing-list threads). +The maintainers SHOULD try to resolve the concerns and wait for the rejecting maintainer to change their opinion to LGTM. +However, a motion MAY be adopted with REJECTs, as outlined in the previous paragraphs. + +## Quorum + +A quorum is established when at least two-thirds of maintainers have voted. + +For projects that are not specifications, a [motion to release](#release-approval) MAY be adopted if the tally is at least three LGTMs and no REJECTs, even if three votes does not meet the usual two-thirds quorum. + +## Amendments + +The [project governance](#project-governance) rules and procedures MAY be amended or replaced using the procedures themselves. +The MAINTAINERS of this project governance document is the total set of MAINTAINERS from all Open Containers projects (go-digest, image-spec, image-tools, runC, runtime-spec, runtime-tools, and selinux). + +## Subject templates + +Maintainers are busy and get lots of email. +To make project proposals recognizable, proposed motions SHOULD use the following subject templates. + +### Proposing a motion + +> [{project} VOTE]: {motion description} (closes {end of voting window}) + +For example: + +> [runtime-spec VOTE]: Tag 0647920 as 1.0.0-rc (closes 2016-06-03 20:00 UTC) + +### Tallying results + +After voting closes, a maintainer SHOULD post a tally to the motion thread with a subject template like: + +> [{project} {status}]: {motion description} (+{LGTMs} -{REJECTs} #{ABSTAINs}) + +Where `{status}` is either `adopted` or `rejected`. +For example: + +> [runtime-spec adopted]: Tag 0647920 as 1.0.0-rc (+6 -0 #3) + +[charter]: https://www.opencontainers.org/about/governance diff --git a/vendor/github.com/opencontainers/cgroups/MAINTAINERS b/vendor/github.com/opencontainers/cgroups/MAINTAINERS new file mode 100644 index 0000000000..413edcb7d3 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/MAINTAINERS @@ -0,0 +1,8 @@ +Akihiro Suda (@AkihiroSuda) +Aleksa Sarai (@cyphar) +Kir Kolyshkin (@kolyshkin) +Mrunal Patel (@mrunalp) +Sebastiaan van Stijn (@thaJeztah) +Odin Ugedal (@odinuge) +Peter Hunt (@haircommander) +Davanum Srinivas (@dims) diff --git a/vendor/github.com/opencontainers/cgroups/MAINTAINERS_GUIDE.md b/vendor/github.com/opencontainers/cgroups/MAINTAINERS_GUIDE.md new file mode 100644 index 0000000000..8e96917473 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/MAINTAINERS_GUIDE.md @@ -0,0 +1,92 @@ +## Introduction + +Dear maintainer. Thank you for investing the time and energy to help +make this project as useful as possible. Maintaining a project is difficult, +sometimes unrewarding work. Sure, you will get to contribute cool +features to the project. But most of your time will be spent reviewing, +cleaning up, documenting, answering questions, justifying design +decisions - while everyone has all the fun! But remember - the quality +of the maintainers work is what distinguishes the good projects from the +great. So please be proud of your work, even the unglamourous parts, +and encourage a culture of appreciation and respect for *every* aspect +of improving the project - not just the hot new features. + +This document is a manual for maintainers old and new. It explains what +is expected of maintainers, how they should work, and what tools are +available to them. + +This is a living document - if you see something out of date or missing, +speak up! + +## What are a maintainer's responsibilities? + +It is every maintainer's responsibility to: + +* Expose a clear roadmap for improving their component. +* Deliver prompt feedback and decisions on pull requests. +* Be available to anyone with questions, bug reports, criticism etc. on their component. + This includes IRC and GitHub issues and pull requests. +* Make sure their component respects the philosophy, design and roadmap of the project. + +## How are decisions made? + +This project is an open-source project with an open design philosophy. This +means that the repository is the source of truth for EVERY aspect of the +project, including its philosophy, design, roadmap and APIs. *If it's +part of the project, it's in the repo. It's in the repo, it's part of +the project.* + +As a result, all decisions can be expressed as changes to the +repository. An implementation change is a change to the source code. An +API change is a change to the API specification. A philosophy change is +a change to the philosophy manifesto. And so on. + +All decisions affecting this project, big and small, follow the same procedure: + +1. Discuss a proposal on the [mailing list](CONTRIBUTING.md#mailing-list). + Anyone can do this. +2. Open a pull request. + Anyone can do this. +3. Discuss the pull request. + Anyone can do this. +4. Endorse (`LGTM`) or oppose (`Rejected`) the pull request. + The relevant maintainers do this (see below [Who decides what?](#who-decides-what)). + Changes that affect project management (changing policy, cutting releases, etc.) are [proposed and voted on the mailing list](GOVERNANCE.md). +5. Merge or close the pull request. + The relevant maintainers do this. + +### I'm a maintainer, should I make pull requests too? + +Yes. Nobody should ever push to master directly. All changes should be +made through a pull request. + +## Who decides what? + +All decisions are pull requests, and the relevant maintainers make +decisions by accepting or refusing the pull request. Review and acceptance +by anyone is denoted by adding a comment in the pull request: `LGTM`. +However, only currently listed `MAINTAINERS` are counted towards the required +two LGTMs. In addition, if a maintainer has created a pull request, they cannot +count toward the two LGTM rule (to ensure equal amounts of review for every pull +request, no matter who wrote it). + +Overall the maintainer system works because of mutual respect. +The maintainers trust one another to act in the best interests of the project. +Sometimes maintainers can disagree and this is part of a healthy project to represent the points of view of various people. +In the case where maintainers cannot find agreement on a specific change, maintainers should use the [governance procedure](GOVERNANCE.md) to attempt to reach a consensus. + +### How are maintainers added? + +The best maintainers have a vested interest in the project. Maintainers +are first and foremost contributors that have shown they are committed to +the long term success of the project. Contributors wanting to become +maintainers are expected to be deeply involved in contributing code, +pull request review, and triage of issues in the project for more than two months. + +Just contributing does not make you a maintainer, it is about building trust with the current maintainers of the project and being a person that they can depend on to act in the best interest of the project. +The final vote to add a new maintainer should be approved by the [governance procedure](GOVERNANCE.md). + +### How are maintainers removed? + +When a maintainer is unable to perform the [required duties](#what-are-a-maintainers-responsibilities) they can be removed by the [governance procedure](GOVERNANCE.md). +Issues related to a maintainer's performance should be discussed with them among the other maintainers so that they are not surprised by a pull request removing them. diff --git a/vendor/github.com/opencontainers/cgroups/README.md b/vendor/github.com/opencontainers/cgroups/README.md new file mode 100644 index 0000000000..a8187da1e8 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/README.md @@ -0,0 +1,11 @@ +# OCI Project Template + +Useful boilerplate and organizational information for all OCI projects. + +* README (this file) +* [The Apache License, Version 2.0](LICENSE) +* [A list of maintainers](MAINTAINERS) +* [Maintainer guidelines](MAINTAINERS_GUIDE.md) +* [Contributor guidelines](CONTRIBUTING.md) +* [Project governance](GOVERNANCE.md) +* [Release procedures](RELEASES.md) diff --git a/vendor/github.com/opencontainers/cgroups/RELEASES.md b/vendor/github.com/opencontainers/cgroups/RELEASES.md new file mode 100644 index 0000000000..e3802706f5 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/RELEASES.md @@ -0,0 +1,51 @@ +# Releases + +The release process hopes to encourage early, consistent consensus-building during project development. +The mechanisms used are regular community communication on the mailing list about progress, scheduled meetings for issue resolution and release triage, and regularly paced and communicated releases. +Releases are proposed and adopted or rejected using the usual [project governance](GOVERNANCE.md) rules and procedures. + +An anti-pattern that we want to avoid is heavy development or discussions "late cycle" around major releases. +We want to build a community that is involved and communicates consistently through all releases instead of relying on "silent periods" as a judge of stability. + +## Parallel releases + +A single project MAY consider several motions to release in parallel. +However each motion to release after the initial 0.1.0 MUST be based on a previous release that has already landed. + +For example, runtime-spec maintainers may propose a v1.0.0-rc2 on the 1st of the month and a v0.9.1 bugfix on the 2nd of the month. +They may not propose a v1.0.0-rc3 until the v1.0.0-rc2 is accepted (on the 7th if the vote initiated on the 1st passes). + +## Specifications + +The OCI maintains three categories of projects: specifications, applications, and conformance-testing tools. +However, specification releases have special restrictions in the [OCI charter][charter]: + +* They are the target of backwards compatibility (§7.g), and +* They are subject to the OFWa patent grant (§8.d and e). + +To avoid unfortunate side effects (onerous backwards compatibility requirements or Member resignations), the following additional procedures apply to specification releases: + +### Planning a release + +Every OCI specification project SHOULD hold meetings that involve maintainers reviewing pull requests, debating outstanding issues, and planning releases. +This meeting MUST be advertised on the project README and MAY happen on a phone call, video conference, or on IRC. +Maintainers MUST send updates to the dev@opencontainers.org with results of these meetings. + +Before the specification reaches v1.0.0, the meetings SHOULD be weekly. +Once a specification has reached v1.0.0, the maintainers may alter the cadence, but a meeting MUST be held within four weeks of the previous meeting. + +The release plans, corresponding milestones and estimated due dates MUST be published on GitHub (e.g. https://github.com/opencontainers/runtime-spec/milestones). +GitHub milestones and issues are only used for community organization and all releases MUST follow the [project governance](GOVERNANCE.md) rules and procedures. + +### Timelines + +Specifications have a variety of different timelines in their lifecycle. + +* Pre-v1.0.0 specifications SHOULD release on a monthly cadence to garner feedback. +* Major specification releases MUST release at least three release candidates spaced a minimum of one week apart. + This means a major release like a v1.0.0 or v2.0.0 release will take 1 month at minimum: one week for rc1, one week for rc2, one week for rc3, and one week for the major release itself. + Maintainers SHOULD strive to make zero breaking changes during this cycle of release candidates and SHOULD restart the three-candidate count when a breaking change is introduced. + For example if a breaking change is introduced in v1.0.0-rc2 then the series would end with v1.0.0-rc4 and v1.0.0. +* Minor and patch releases SHOULD be made on an as-needed basis. + +[charter]: https://www.opencontainers.org/about/governance diff --git a/vendor/github.com/opencontainers/cgroups/cgroups.go b/vendor/github.com/opencontainers/cgroups/cgroups.go new file mode 100644 index 0000000000..1f127550c0 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/cgroups.go @@ -0,0 +1,78 @@ +package cgroups + +import ( + "errors" +) + +var ( + // ErrDevicesUnsupported is an error returned when a cgroup manager + // is not configured to set device rules. + ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") + + // ErrRootless is returned by [Manager.Apply] when there is an error + // creating cgroup directory, and cgroup.Rootless is set. In general, + // this error is to be ignored. + ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)") + + // DevicesSetV1 and DevicesSetV2 are functions to set devices for + // cgroup v1 and v2, respectively. Unless + // [github.com/opencontainers/cgroups/devices] + // package is imported, it is set to nil, so cgroup managers can't + // manage devices. + DevicesSetV1 func(path string, r *Resources) error + DevicesSetV2 func(path string, r *Resources) error +) + +type Manager interface { + // Apply creates a cgroup, if not yet created, and adds a process + // with the specified pid into that cgroup. A special value of -1 + // can be used to merely create a cgroup. + Apply(pid int) error + + // GetPids returns the PIDs of all processes inside the cgroup. + GetPids() ([]int, error) + + // GetAllPids returns the PIDs of all processes inside the cgroup + // any all its sub-cgroups. + GetAllPids() ([]int, error) + + // GetStats returns cgroups statistics. + GetStats() (*Stats, error) + + // Freeze sets the freezer cgroup to the specified state. + Freeze(state FreezerState) error + + // Destroy removes cgroup. + Destroy() error + + // Path returns a cgroup path to the specified controller/subsystem. + // For cgroupv2, the argument is unused and can be empty. + Path(string) string + + // Set sets cgroup resources parameters/limits. If the argument is nil, + // the resources specified during Manager creation (or the previous call + // to Set) are used. + Set(r *Resources) error + + // GetPaths returns cgroup path(s) to save in a state file in order to + // restore later. + // + // For cgroup v1, a key is cgroup subsystem name, and the value is the + // path to the cgroup for this subsystem. + // + // For cgroup v2 unified hierarchy, a key is "", and the value is the + // unified path. + GetPaths() map[string]string + + // GetCgroups returns the cgroup data as configured. + GetCgroups() (*Cgroup, error) + + // GetFreezerState retrieves the current FreezerState of the cgroup. + GetFreezerState() (FreezerState, error) + + // Exists returns whether the cgroup path exists or not. + Exists() bool + + // OOMKillCount reports OOM kill count for the cgroup. + OOMKillCount() (uint64, error) +} diff --git a/vendor/github.com/opencontainers/cgroups/config_blkio_device.go b/vendor/github.com/opencontainers/cgroups/config_blkio_device.go new file mode 100644 index 0000000000..9dc2a034c6 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_blkio_device.go @@ -0,0 +1,66 @@ +package cgroups + +import "fmt" + +// BlockIODevice holds major:minor format supported in blkio cgroup. +type BlockIODevice struct { + // Major is the device's major number + Major int64 `json:"major"` + // Minor is the device's minor number + Minor int64 `json:"minor"` +} + +// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair +type WeightDevice struct { + BlockIODevice + // Weight is the bandwidth rate for the device, range is from 10 to 1000 + Weight uint16 `json:"weight"` + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only + LeafWeight uint16 `json:"leafWeight"` +} + +// NewWeightDevice returns a configured WeightDevice pointer +func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice { + wd := &WeightDevice{} + wd.Major = major + wd.Minor = minor + wd.Weight = weight + wd.LeafWeight = leafWeight + return wd +} + +// WeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) WeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight) +} + +// LeafWeightString formats the struct to be writable to the cgroup specific file +func (wd *WeightDevice) LeafWeightString() string { + return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight) +} + +// ThrottleDevice struct holds a `major:minor rate_per_second` pair +type ThrottleDevice struct { + BlockIODevice + // Rate is the IO rate limit per cgroup per device + Rate uint64 `json:"rate"` +} + +// NewThrottleDevice returns a configured ThrottleDevice pointer +func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice { + td := &ThrottleDevice{} + td.Major = major + td.Minor = minor + td.Rate = rate + return td +} + +// String formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) String() string { + return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate) +} + +// StringName formats the struct to be writable to the cgroup specific file +func (td *ThrottleDevice) StringName(name string) string { + return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate) +} diff --git a/vendor/github.com/opencontainers/cgroups/config_hugepages.go b/vendor/github.com/opencontainers/cgroups/config_hugepages.go new file mode 100644 index 0000000000..5357dd090f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_hugepages.go @@ -0,0 +1,9 @@ +package cgroups + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit uint64 `json:"limit"` +} diff --git a/vendor/github.com/opencontainers/cgroups/config_ifprio_map.go b/vendor/github.com/opencontainers/cgroups/config_ifprio_map.go new file mode 100644 index 0000000000..d771603a77 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_ifprio_map.go @@ -0,0 +1,14 @@ +package cgroups + +import ( + "fmt" +) + +type IfPrioMap struct { + Interface string `json:"interface"` + Priority int64 `json:"priority"` +} + +func (i *IfPrioMap) CgroupString() string { + return fmt.Sprintf("%s %d", i.Interface, i.Priority) +} diff --git a/vendor/github.com/opencontainers/cgroups/config_linux.go b/vendor/github.com/opencontainers/cgroups/config_linux.go new file mode 100644 index 0000000000..9bc58a3789 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_linux.go @@ -0,0 +1,169 @@ +package cgroups + +import ( + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + devices "github.com/opencontainers/cgroups/devices/config" +) + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +// Cgroup holds properties of a cgroup on Linux. +type Cgroup struct { + // Name specifies the name of the cgroup + Name string `json:"name,omitempty"` + + // Parent specifies the name of parent of cgroup or slice + Parent string `json:"parent,omitempty"` + + // Path specifies the path to cgroups that are created and/or joined by the container. + // The path is assumed to be relative to the host system cgroup mountpoint. + Path string `json:"path,omitempty"` + + // ScopePrefix describes prefix for the scope name. + ScopePrefix string `json:"scope_prefix,omitempty"` + + // Resources contains various cgroups settings to apply. + *Resources + + // Systemd tells if systemd should be used to manage cgroups. + Systemd bool `json:"Systemd,omitempty"` + + // SystemdProps are any additional properties for systemd, + // derived from org.systemd.property.xxx annotations. + // Ignored unless systemd is used for managing cgroups. + SystemdProps []systemdDbus.Property `json:"-"` + + // Rootless tells if rootless cgroups should be used. + Rootless bool `json:"Rootless,omitempty"` + + // The host UID that should own the cgroup, or nil to accept + // the default ownership. This should only be set when the + // cgroupfs is to be mounted read/write. + // Not all cgroup manager implementations support changing + // the ownership. + OwnerUID *int `json:"owner_uid,omitempty"` +} + +type Resources struct { + // Devices is the set of access rules for devices in the container. + Devices []*devices.Rule `json:"devices,omitempty"` + + // Memory limit (in bytes). + Memory int64 `json:"memory,omitempty"` + + // Memory reservation or soft_limit (in bytes). + MemoryReservation int64 `json:"memory_reservation,omitempty"` + + // Total memory usage (memory+swap); use -1 for unlimited swap. + MemorySwap int64 `json:"memory_swap,omitempty"` + + // CPU shares (relative weight vs. other containers). + CpuShares uint64 `json:"cpu_shares,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuShares should be CPUShares". + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota". + + // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. + CpuBurst *uint64 `json:"cpu_burst,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuBurst should be CPUBurst". + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod uint64 `json:"cpu_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuPeriod should be CPUPeriod". + + // How many time CPU will use in realtime scheduling (in usecs). + CpuRtRuntime int64 `json:"cpu_rt_quota,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuRtRuntime should be CPURtRuntime". + + // CPU period to be used for realtime scheduling (in usecs). + CpuRtPeriod uint64 `json:"cpu_rt_period,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuQuota should be CPUQuota". + + // Cpuset CPUs to use. + CpusetCpus string `json:"cpuset_cpus,omitempty"` + + // Cpuset memory nodes to use. + CpusetMems string `json:"cpuset_mems,omitempty"` + + // Cgroup's SCHED_IDLE value. + CPUIdle *int64 `json:"cpu_idle,omitempty"` + + // Process limit; set <= `0' to disable limit. + PidsLimit int64 `json:"pids_limit,omitempty"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight uint16 `json:"blkio_weight,omitempty"` + + // Tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only. + BlkioLeafWeight uint16 `json:"blkio_leaf_weight,omitempty"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device,omitempty"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device,omitempty"` + + // IO write rate limit per cgroup per device, bytes per second. + BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device,omitempty"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device,omitempty"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device,omitempty"` + + // Freeze value for the process. + Freezer FreezerState `json:"freezer,omitempty"` + + // Hugetlb limit (in bytes). + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit,omitempty"` + + // Whether to disable OOM killer. + OomKillDisable bool `json:"oom_kill_disable,omitempty"` + + // Tuning swappiness behaviour per cgroup. + MemorySwappiness *uint64 `json:"memory_swappiness,omitempty"` + + // Set priority of network traffic for container. + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap,omitempty"` + + // Set class identifier for container's network packets. + NetClsClassid uint32 `json:"net_cls_classid_u,omitempty"` + + // Rdma resource restriction configuration. + Rdma map[string]LinuxRdma `json:"rdma,omitempty"` + + // Used on cgroups v2: + + // CpuWeight sets a proportional bandwidth limit. + CpuWeight uint64 `json:"cpu_weight,omitempty"` //nolint:revive // Suppress "var-naming: struct field CpuWeight should be CPUWeight". + + // Unified is cgroupv2-only key-value map. + Unified map[string]string `json:"unified,omitempty"` + + // SkipDevices allows to skip configuring device permissions. + // Used by e.g. kubelet while creating a parent cgroup (kubepods) + // common for many containers, and by runc update. + // + // NOTE it is impossible to start a container which has this flag set. + SkipDevices bool `json:"-"` + + // SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup + // freeze when setting resources. Only applicable to systemd legacy + // (i.e. cgroup v1) manager (which uses freeze by default to avoid + // spurious permission errors caused by systemd inability to update + // device rules in a non-disruptive manner). + // + // If not set, a few methods (such as looking into cgroup's + // devices.list and querying the systemd unit properties) are used + // during Set() to figure out whether the freeze is required. Those + // methods may be relatively slow, thus this flag. + SkipFreezeOnSet bool `json:"-"` + + // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check + // if the new memory limits (Memory and MemorySwap) being set are lower + // than the current memory usage, and reject if so. + MemoryCheckBeforeUpdate bool `json:"memory_check_before_update,omitempty"` +} diff --git a/vendor/github.com/opencontainers/cgroups/config_rdma.go b/vendor/github.com/opencontainers/cgroups/config_rdma.go new file mode 100644 index 0000000000..a0bd54f04f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_rdma.go @@ -0,0 +1,9 @@ +package cgroups + +// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) +type LinuxRdma struct { + // Maximum number of HCA handles that can be opened. Default is "no limit". + HcaHandles *uint32 `json:"hca_handles,omitempty"` + // Maximum number of HCA objects that can be created. Default is "no limit". + HcaObjects *uint32 `json:"hca_objects,omitempty"` +} diff --git a/vendor/github.com/opencontainers/cgroups/config_unsupported.go b/vendor/github.com/opencontainers/cgroups/config_unsupported.go new file mode 100644 index 0000000000..db32ec4832 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/config_unsupported.go @@ -0,0 +1,8 @@ +//go:build !linux + +package cgroups + +// Cgroup holds properties of a cgroup on Linux +// TODO Windows: This can ultimately be entirely factored out on Windows as +// cgroups are a Unix-specific construct. +type Cgroup struct{} diff --git a/vendor/github.com/opencontainers/cgroups/file.go b/vendor/github.com/opencontainers/cgroups/file.go new file mode 100644 index 0000000000..c1b8f5c15f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/file.go @@ -0,0 +1,216 @@ +package cgroups + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// OpenFile opens a cgroup file in a given dir with given flags. +// It is supposed to be used for cgroup files only, and returns +// an error if the file is not a cgroup file. +// +// Arguments dir and file are joined together to form an absolute path +// to a file being opened. +func OpenFile(dir, file string, flags int) (*os.File, error) { + if dir == "" { + return nil, fmt.Errorf("no directory specified for %s", file) + } + return openFile(dir, file, flags) +} + +// ReadFile reads data from a cgroup file in dir. +// It is supposed to be used for cgroup files only. +func ReadFile(dir, file string) (string, error) { + fd, err := OpenFile(dir, file, unix.O_RDONLY) + if err != nil { + return "", err + } + defer fd.Close() + var buf bytes.Buffer + + _, err = buf.ReadFrom(fd) + return buf.String(), err +} + +// WriteFile writes data to a cgroup file in dir. +// It is supposed to be used for cgroup files only. +func WriteFile(dir, file, data string) error { + fd, err := OpenFile(dir, file, unix.O_WRONLY) + if err != nil { + return err + } + defer fd.Close() + if _, err := fd.WriteString(data); err != nil { + // Having data in the error message helps in debugging. + return fmt.Errorf("failed to write %q: %w", data, err) + } + return nil +} + +// WriteFileByLine is the same as WriteFile, except if data contains newlines, +// it is written line by line. +func WriteFileByLine(dir, file, data string) error { + i := strings.Index(data, "\n") + if i == -1 { + return WriteFile(dir, file, data) + } + + fd, err := OpenFile(dir, file, unix.O_WRONLY) + if err != nil { + return err + } + defer fd.Close() + start := 0 + for { + var line string + if i == -1 { + line = data[start:] + } else { + line = data[start : start+i+1] + } + _, err := fd.WriteString(line) + if err != nil { + return fmt.Errorf("failed to write %q: %w", line, err) + } + if i == -1 { + break + } + start += i + 1 + i = strings.Index(data[start:], "\n") + } + return nil +} + +const ( + cgroupfsDir = "/sys/fs/cgroup" + cgroupfsPrefix = cgroupfsDir + "/" +) + +var ( + // TestMode is set to true by unit tests that need "fake" cgroupfs. + TestMode bool + + cgroupRootHandle *os.File + prepOnce sync.Once + prepErr error + resolveFlags uint64 +) + +func prepareOpenat2() error { + prepOnce.Do(func() { + fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ + Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC, + }) + if err != nil { + prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} + if err != unix.ENOSYS { + logrus.Warnf("falling back to securejoin: %s", prepErr) + } else { + logrus.Debug("openat2 not available, falling back to securejoin") + } + return + } + file := os.NewFile(uintptr(fd), cgroupfsDir) + + var st unix.Statfs_t + if err := unix.Fstatfs(int(file.Fd()), &st); err != nil { + prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} + logrus.Warnf("falling back to securejoin: %s", prepErr) + return + } + + cgroupRootHandle = file + resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS + if st.Type == unix.CGROUP2_SUPER_MAGIC { + // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks + resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS + } + }) + + return prepErr +} + +func openFile(dir, file string, flags int) (*os.File, error) { + mode := os.FileMode(0) + if TestMode && flags&os.O_WRONLY != 0 { + // "emulate" cgroup fs for unit tests + flags |= os.O_TRUNC | os.O_CREATE + mode = 0o600 + } + // NOTE it is important to use filepath.Clean("/"+file) here + // (see https://github.com/opencontainers/runc/issues/4103)! + path := filepath.Join(dir, filepath.Clean("/"+file)) + + if prepareOpenat2() != nil { + return openFallback(path, flags, mode) + } + relPath, ok := strings.CutPrefix(path, cgroupfsPrefix) + if !ok { // Non-standard path, old system? + return openFallback(path, flags, mode) + } + + fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath, + &unix.OpenHow{ + Resolve: resolveFlags, + Flags: uint64(flags) | unix.O_CLOEXEC, + Mode: uint64(mode), + }) + if err != nil { + err = &os.PathError{Op: "openat2", Path: path, Err: err} + // Check if cgroupRootHandle is still opened to cgroupfsDir + // (happens when this package is incorrectly used + // across the chroot/pivot_root/mntns boundary, or + // when /sys/fs/cgroup is remounted). + // + // TODO: if such usage will ever be common, amend this + // to reopen cgroupRootHandle and retry openat2. + fdDest, fdErr := os.Readlink("/proc/thread-self/fd/" + strconv.Itoa(int(cgroupRootHandle.Fd()))) + if fdErr == nil && fdDest != cgroupfsDir { + // Wrap the error so it is clear that cgroupRootHandle + // is opened to an unexpected/wrong directory. + err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w", + cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err) + } + return nil, err + } + + return os.NewFile(uintptr(fd), path), nil +} + +var errNotCgroupfs = errors.New("not a cgroup file") + +// Can be changed by unit tests. +var openFallback = openAndCheck + +// openAndCheck is used when openat2(2) is not available. It checks the opened +// file is on cgroupfs, returning an error otherwise. +func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) { + fd, err := os.OpenFile(path, flags, mode) + if err != nil { + return nil, err + } + if TestMode { + return fd, nil + } + // Check this is a cgroupfs file. + var st unix.Statfs_t + if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil { + _ = fd.Close() + return nil, &os.PathError{Op: "statfs", Path: path, Err: err} + } + if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC { + _ = fd.Close() + return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs} + } + + return fd, nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/cgroups/fs/blkio.go new file mode 100644 index 0000000000..f3c4c5cf81 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/blkio.go @@ -0,0 +1,310 @@ +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/cgroups" +) + +type BlkioGroup struct { + weightFilename string + weightDeviceFilename string +} + +func (s *BlkioGroup) Name() string { + return "blkio" +} + +func (s *BlkioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *BlkioGroup) Set(path string, r *cgroups.Resources) error { + s.detectWeightFilenames(path) + if r.BlkioWeight != 0 { + if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { + return err + } + } + + if r.BlkioLeafWeight != 0 { + if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil { + return err + } + } + for _, wd := range r.BlkioWeightDevice { + if wd.Weight != 0 { + if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { + return err + } + } + if wd.LeafWeight != 0 { + if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { + return err + } + } + } + for _, td := range r.BlkioThrottleReadBpsDevice { + if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleWriteBpsDevice { + if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleReadIOPSDevice { + if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleWriteIOPSDevice { + if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { + return err + } + } + + return nil +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := cgroups.OpenFile(dir, file, os.O_RDONLY) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, malformedLine(dir, file, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, &parseError{Path: dir, File: file, Err: err} + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, &parseError{Path: dir, File: file, Err: err} + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, &parseError{Path: dir, File: file, Err: err} + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + if err := sc.Err(); err != nil { + return nil, &parseError{Path: dir, File: file, Err: err} + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + type blkioStatInfo struct { + filename string + blkioStatEntriesPtr *[]cgroups.BlkioStatEntry + } + bfqDebugStats := []blkioStatInfo{ + { + filename: "blkio.bfq.sectors_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, + }, + { + filename: "blkio.bfq.io_service_time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, + }, + { + filename: "blkio.bfq.io_wait_time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, + }, + { + filename: "blkio.bfq.io_merged_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, + }, + { + filename: "blkio.bfq.io_queued_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, + }, + { + filename: "blkio.bfq.time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, + }, + { + filename: "blkio.bfq.io_serviced_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, + }, + { + filename: "blkio.bfq.io_service_bytes_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, + }, + } + bfqStats := []blkioStatInfo{ + { + filename: "blkio.bfq.io_serviced_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, + }, + { + filename: "blkio.bfq.io_service_bytes_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, + }, + } + cfqStats := []blkioStatInfo{ + { + filename: "blkio.sectors_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, + }, + { + filename: "blkio.io_service_time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, + }, + { + filename: "blkio.io_wait_time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, + }, + { + filename: "blkio.io_merged_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, + }, + { + filename: "blkio.io_queued_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, + }, + { + filename: "blkio.time_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, + }, + { + filename: "blkio.io_serviced_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, + }, + { + filename: "blkio.io_service_bytes_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, + }, + } + throttleRecursiveStats := []blkioStatInfo{ + { + filename: "blkio.throttle.io_serviced_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, + }, + { + filename: "blkio.throttle.io_service_bytes_recursive", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, + }, + } + baseStats := []blkioStatInfo{ + { + filename: "blkio.throttle.io_serviced", + blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, + }, + { + filename: "blkio.throttle.io_service_bytes", + blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, + }, + } + orderedStats := [][]blkioStatInfo{ + bfqDebugStats, + bfqStats, + cfqStats, + throttleRecursiveStats, + baseStats, + } + + var blkioStats []cgroups.BlkioStatEntry + var err error + + for _, statGroup := range orderedStats { + for i, statInfo := range statGroup { + if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil { + // if error occurs on first file, move to next group + if i == 0 { + break + } + return err + } + *statInfo.blkioStatEntriesPtr = blkioStats + // finish if all stats are gathered + if i == len(statGroup)-1 { + return nil + } + } + } + return nil +} + +func (s *BlkioGroup) detectWeightFilenames(path string) { + if s.weightFilename != "" { + // Already detected. + return + } + if cgroups.PathExists(filepath.Join(path, "blkio.weight")) { + s.weightFilename = "blkio.weight" + s.weightDeviceFilename = "blkio.weight_device" + } else { + s.weightFilename = "blkio.bfq.weight" + s.weightDeviceFilename = "blkio.bfq.weight_device" + } +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/cgroups/fs/cpu.go new file mode 100644 index 0000000000..3e05788a3f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/cpu.go @@ -0,0 +1,181 @@ +package fs + +import ( + "bufio" + "errors" + "fmt" + "os" + "strconv" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" + "golang.org/x/sys/unix" +) + +type CpuGroup struct{} + +func (s *CpuGroup) Name() string { + return "cpu" +} + +func (s *CpuGroup) Apply(path string, r *cgroups.Resources, pid int) error { + if err := os.MkdirAll(path, 0o755); err != nil { + return err + } + // We should set the real-Time group scheduling settings before moving + // in the process because if the process is already in SCHED_RR mode + // and no RT bandwidth is set, adding it will fail. + if err := s.SetRtSched(path, r); err != nil { + return err + } + // Since we are not using apply(), we need to place the pid + // into the procs file. + return cgroups.WriteCgroupProc(path, pid) +} + +func (s *CpuGroup) SetRtSched(path string, r *cgroups.Resources) error { + var period string + if r.CpuRtPeriod != 0 { + period = strconv.FormatUint(r.CpuRtPeriod, 10) + if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { + // The values of cpu.rt_period_us and cpu.rt_runtime_us + // are inter-dependent and need to be set in a proper order. + // If the kernel rejects the new period value with EINVAL + // and the new runtime value is also being set, let's + // ignore the error for now and retry later. + if !errors.Is(err, unix.EINVAL) || r.CpuRtRuntime == 0 { + return err + } + } else { + period = "" + } + } + if r.CpuRtRuntime != 0 { + if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil { + return err + } + if period != "" { + if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { + return err + } + } + } + return nil +} + +func (s *CpuGroup) Set(path string, r *cgroups.Resources) error { + if r.CpuShares != 0 { + shares := r.CpuShares + if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil { + return err + } + // read it back + sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares") + if err != nil { + return err + } + // ... and check + if shares > sharesRead { + return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead) + } else if shares < sharesRead { + return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead) + } + } + + var period string + if r.CpuPeriod != 0 { + period = strconv.FormatUint(r.CpuPeriod, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + // Sometimes when the period to be set is smaller + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_period exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + period = "" + } + } + + var burst string + if r.CpuBurst != nil { + burst = strconv.FormatUint(*r.CpuBurst, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + if errors.Is(err, unix.ENOENT) { + // If CPU burst knob is not available (e.g. + // older kernel), ignore it. + burst = "" + } else { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } + } else { + burst = "" + } + } + if r.CpuQuota != 0 { + if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { + return err + } + if period != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { + return err + } + } + if burst != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + return err + } + } + } + + if r.CPUIdle != nil { + idle := strconv.FormatInt(*r.CPUIdle, 10) + if err := cgroups.WriteFile(path, "cpu.idle", idle); err != nil { + return err + } + } + + return s.SetRtSched(path, r) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + const file = "cpu.stat" + f, err := cgroups.OpenFile(path, file, os.O_RDONLY) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := fscommon.ParseKeyValue(sc.Text()) + if err != nil { + return &parseError{Path: path, File: file, Err: err} + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go new file mode 100644 index 0000000000..391a023c75 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go @@ -0,0 +1,158 @@ +package fs + +import ( + "bufio" + "os" + "strconv" + "strings" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +const ( + nsInSec = 1000000000 + + // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and + // on Linux it's a constant which is safe to be hard coded, + // so we can avoid using cgo here. For details, see: + // https://github.com/containerd/cgroups/pull/12 + clockTicks uint64 = 100 +) + +type CpuacctGroup struct{} + +func (s *CpuacctGroup) Name() string { + return "cpuacct" +} + +func (s *CpuacctGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *CpuacctGroup) Set(_ string, _ *cgroups.Resources) error { + return nil +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + if !cgroups.PathExists(path) { + return nil + } + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode + stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + var userModeUsage, kernelModeUsage uint64 + const ( + userField = "user" + systemField = "system" + file = "cpuacct.stat" + ) + + // Expected format: + // user + // system + data, err := cgroups.ReadFile(path, file) + if err != nil { + return 0, 0, err + } + + fields := strings.Fields(data) + if len(fields) < 4 || fields[0] != userField || fields[2] != systemField { + return 0, 0, malformedLine(path, file, data) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, &parseError{Path: path, File: file, Err: err} + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, &parseError{Path: path, File: file, Err: err} + } + + return (userModeUsage * nsInSec) / clockTicks, (kernelModeUsage * nsInSec) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + const file = "cpuacct.usage_percpu" + percpuUsage := []uint64{} + data, err := cgroups.ReadFile(path, file) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(data) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, &parseError{Path: path, File: file, Err: err} + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} + +func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) { + usageKernelMode := []uint64{} + usageUserMode := []uint64{} + const file = "cpuacct.usage_all" + + fd, err := cgroups.OpenFile(path, file, os.O_RDONLY) + if os.IsNotExist(err) { + return usageKernelMode, usageUserMode, nil + } else if err != nil { + return nil, nil, err + } + defer fd.Close() + + scanner := bufio.NewScanner(fd) + scanner.Scan() // skipping header line + + for scanner.Scan() { + // Each line is: cpu user system + fields := strings.SplitN(scanner.Text(), " ", 3) + if len(fields) != 3 { + continue + } + + user, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, nil, &parseError{Path: path, File: file, Err: err} + } + usageUserMode = append(usageUserMode, user) + + kernel, err := strconv.ParseUint(fields[2], 10, 64) + if err != nil { + return nil, nil, &parseError{Path: path, File: file, Err: err} + } + usageKernelMode = append(usageKernelMode, kernel) + } + if err := scanner.Err(); err != nil { + return nil, nil, &parseError{Path: path, File: file, Err: err} + } + + return usageKernelMode, usageUserMode, nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/cgroups/fs/cpuset.go new file mode 100644 index 0000000000..ef6ff7da30 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/cpuset.go @@ -0,0 +1,276 @@ +package fs + +import ( + "errors" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +var ( + cpusetLock sync.Mutex + cpusetPrefix = "cpuset." + cpusetFastPath bool +) + +func cpusetFile(path string, name string) string { + cpusetLock.Lock() + defer cpusetLock.Unlock() + + // Only the v1 cpuset cgroup is allowed to mount with noprefix. + // See kernel source: https://github.com/torvalds/linux/blob/2e1b3cc9d7f790145a80cb705b168f05dab65df2/kernel/cgroup/cgroup-v1.c#L1070 + // Cpuset cannot be mounted with and without prefix simultaneously. + // Commonly used in Android environments. + + if cpusetFastPath { + return cpusetPrefix + name + } + + err := unix.Access(filepath.Join(path, cpusetPrefix+name), unix.F_OK) + if err == nil { + // Use the fast path only if we can access one type of mount for cpuset already + cpusetFastPath = true + } else { + err = unix.Access(filepath.Join(path, name), unix.F_OK) + if err == nil { + cpusetPrefix = "" + cpusetFastPath = true + } + } + + return cpusetPrefix + name +} + +type CpusetGroup struct{} + +func (s *CpusetGroup) Name() string { + return "cpuset" +} + +func (s *CpusetGroup) Apply(path string, r *cgroups.Resources, pid int) error { + return s.ApplyDir(path, r, pid) +} + +func (s *CpusetGroup) Set(path string, r *cgroups.Resources) error { + if r.CpusetCpus != "" { + if err := cgroups.WriteFile(path, cpusetFile(path, "cpus"), r.CpusetCpus); err != nil { + return err + } + } + if r.CpusetMems != "" { + if err := cgroups.WriteFile(path, cpusetFile(path, "mems"), r.CpusetMems); err != nil { + return err + } + } + return nil +} + +func getCpusetStat(path string, file string) ([]uint16, error) { + var extracted []uint16 + fileContent, err := fscommon.GetCgroupParamString(path, file) + if err != nil { + return extracted, err + } + if len(fileContent) == 0 { + return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")} + } + + for _, s := range strings.Split(fileContent, ",") { + fromStr, toStr, ok := strings.Cut(s, "-") + if ok { + from, err := strconv.ParseUint(fromStr, 10, 16) + if err != nil { + return extracted, &parseError{Path: path, File: file, Err: err} + } + to, err := strconv.ParseUint(toStr, 10, 16) + if err != nil { + return extracted, &parseError{Path: path, File: file, Err: err} + } + if from > to { + return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, from > to")} + } + for i := from; i <= to; i++ { + extracted = append(extracted, uint16(i)) + } + } else { + value, err := strconv.ParseUint(s, 10, 16) + if err != nil { + return extracted, &parseError{Path: path, File: file, Err: err} + } + extracted = append(extracted, uint16(value)) + } + } + + return extracted, nil +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + var err error + + stats.CPUSetStats.CPUs, err = getCpusetStat(path, cpusetFile(path, "cpus")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "cpu_exclusive")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.Mems, err = getCpusetStat(path, cpusetFile(path, "mems")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "mem_hardwall")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "mem_exclusive")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_migrate")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_spread_page")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_spread_slab")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_pressure")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "sched_load_balance")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, cpusetFile(path, "sched_relax_domain_level")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, r *cgroups.Resources, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + // 'ensureParent' start with parent because we don't want to + // explicitly inherit from parent, it could conflict with + // 'cpuset.cpu_exclusive'. + if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil { + return err + } + if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) { + return err + } + // We didn't inherit cpuset configs from parent, but we have + // to ensure cpuset configs are set before moving task into the + // cgroup. + // The logic is, if user specified cpuset configs, use these + // specified configs, otherwise, inherit from parent. This makes + // cpuset configs work correctly with 'cpuset.cpu_exclusive', and + // keep backward compatibility. + if err := s.ensureCpusAndMems(dir, r); err != nil { + return err + } + // Since we are not using apply(), we need to place the pid + // into the procs file. + return cgroups.WriteCgroupProc(dir, pid) +} + +func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) { + if cpus, err = cgroups.ReadFile(parent, cpusetFile(parent, "cpus")); err != nil { + return + } + if mems, err = cgroups.ReadFile(parent, cpusetFile(parent, "mems")); err != nil { + return + } + return cpus, mems, nil +} + +// cpusetEnsureParent makes sure that the parent directories of current +// are created and populated with the proper cpus and mems files copied +// from their respective parent. It does that recursively, starting from +// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). +func cpusetEnsureParent(current string) error { + var st unix.Statfs_t + + parent := filepath.Dir(current) + err := unix.Statfs(parent, &st) + if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { + return nil + } + // Treat non-existing directory as cgroupfs as it will be created, + // and the root cpuset directory obviously exists. + if err != nil && err != unix.ENOENT { + return &os.PathError{Op: "statfs", Path: parent, Err: err} + } + + if err := cpusetEnsureParent(parent); err != nil { + return err + } + if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) { + return err + } + return cpusetCopyIfNeeded(current, parent) +} + +// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func cpusetCopyIfNeeded(current, parent string) error { + currentCpus, currentMems, err := getCpusetSubsystemSettings(current) + if err != nil { + return err + } + parentCpus, parentMems, err := getCpusetSubsystemSettings(parent) + if err != nil { + return err + } + + if isEmptyCpuset(currentCpus) { + if err := cgroups.WriteFile(current, cpusetFile(current, "cpus"), parentCpus); err != nil { + return err + } + } + if isEmptyCpuset(currentMems) { + if err := cgroups.WriteFile(current, cpusetFile(current, "mems"), parentMems); err != nil { + return err + } + } + return nil +} + +func isEmptyCpuset(str string) bool { + return str == "" || str == "\n" +} + +func (s *CpusetGroup) ensureCpusAndMems(path string, r *cgroups.Resources) error { + if err := s.Set(path, r); err != nil { + return err + } + return cpusetCopyIfNeeded(path, filepath.Dir(path)) +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/devices.go b/vendor/github.com/opencontainers/cgroups/fs/devices.go new file mode 100644 index 0000000000..26483ecb7d --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/devices.go @@ -0,0 +1,38 @@ +package fs + +import ( + "github.com/opencontainers/cgroups" +) + +type DevicesGroup struct{} + +func (s *DevicesGroup) Name() string { + return "devices" +} + +func (s *DevicesGroup) Apply(path string, r *cgroups.Resources, pid int) error { + if r.SkipDevices { + return nil + } + if path == "" { + // Return error here, since devices cgroup + // is a hard requirement for container's security. + return errSubsystemDoesNotExist + } + + return apply(path, pid) +} + +func (s *DevicesGroup) Set(path string, r *cgroups.Resources) error { + if cgroups.DevicesSetV1 == nil { + if len(r.Devices) == 0 { + return nil + } + return cgroups.ErrDevicesUnsupported + } + return cgroups.DevicesSetV1(path, r) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/error.go b/vendor/github.com/opencontainers/cgroups/fs/error.go new file mode 100644 index 0000000000..f13033e3d8 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/error.go @@ -0,0 +1,15 @@ +package fs + +import ( + "fmt" + + "github.com/opencontainers/cgroups/fscommon" +) + +type parseError = fscommon.ParseError + +// malformedLine is used by all cgroupfs file parsers that expect a line +// in a particular format but get some garbage instead. +func malformedLine(path, file, line string) error { + return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)} +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/cgroups/fs/freezer.go new file mode 100644 index 0000000000..fe0f0dde48 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/freezer.go @@ -0,0 +1,157 @@ +package fs + +import ( + "errors" + "fmt" + "os" + "strings" + "time" + + "github.com/opencontainers/cgroups" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +type FreezerGroup struct{} + +func (s *FreezerGroup) Name() string { + return "freezer" +} + +func (s *FreezerGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *FreezerGroup) Set(path string, r *cgroups.Resources) (Err error) { + switch r.Freezer { + case cgroups.Frozen: + defer func() { + if Err != nil { + // Freezing failed, and it is bad and dangerous + // to leave the cgroup in FROZEN or FREEZING + // state, so (try to) thaw it back. + _ = cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) + } + }() + + // As per older kernel docs (freezer-subsystem.txt before + // kernel commit ef9fe980c6fcc1821), if FREEZING is seen, + // userspace should either retry or thaw. While current + // kernel cgroup v1 docs no longer mention a need to retry, + // even a recent kernel (v5.4, Ubuntu 20.04) can't reliably + // freeze a cgroup v1 while new processes keep appearing in it + // (either via fork/clone or by writing new PIDs to + // cgroup.procs). + // + // The numbers below are empirically chosen to have a decent + // chance to succeed in various scenarios ("runc pause/unpause + // with parallel runc exec" and "bare freeze/unfreeze on a very + // slow system"), tested on RHEL7 and Ubuntu 20.04 kernels. + // + // Adding any amount of sleep in between retries did not + // increase the chances of successful freeze in "pause/unpause + // with parallel exec" reproducer. OTOH, adding an occasional + // sleep helped for the case where the system is extremely slow + // (CentOS 7 VM on GHA CI). + // + // Alas, this is still a game of chances, since the real fix + // belong to the kernel (cgroup v2 do not have this bug). + + for i := range 1000 { + if i%50 == 49 { + // Occasional thaw and sleep improves + // the chances to succeed in freezing + // in case new processes keep appearing + // in the cgroup. + _ = cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) + time.Sleep(10 * time.Millisecond) + } + + if err := cgroups.WriteFile(path, "freezer.state", string(cgroups.Frozen)); err != nil { + return err + } + + if i%25 == 24 { + // Occasional short sleep before reading + // the state back also improves the chances to + // succeed in freezing in case of a very slow + // system. + time.Sleep(10 * time.Microsecond) + } + state, err := cgroups.ReadFile(path, "freezer.state") + if err != nil { + return err + } + state = strings.TrimSpace(state) + switch state { + case "FREEZING": + continue + case string(cgroups.Frozen): + if i > 1 { + logrus.Debugf("frozen after %d retries", i) + } + return nil + default: + // should never happen + return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state)) + } + } + // Despite our best efforts, it got stuck in FREEZING. + return errors.New("unable to freeze") + case cgroups.Thawed: + return cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) + case cgroups.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer)) + } +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *FreezerGroup) GetState(path string) (cgroups.FreezerState, error) { + for { + state, err := cgroups.ReadFile(path, "freezer.state") + if err != nil { + // If the kernel is too old, then we just treat the freezer as + // being in an "undefined" state. + if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) { + err = nil + } + return cgroups.Undefined, err + } + switch strings.TrimSpace(state) { + case "THAWED": + return cgroups.Thawed, nil + case "FROZEN": + // Find out whether the cgroup is frozen directly, + // or indirectly via an ancestor. + self, err := cgroups.ReadFile(path, "freezer.self_freezing") + if err != nil { + // If the kernel is too old, then we just treat + // it as being frozen. + if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) { + err = nil + } + return cgroups.Frozen, err + } + switch self { + case "0\n": + return cgroups.Thawed, nil + case "1\n": + return cgroups.Frozen, nil + default: + return cgroups.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self) + } + case "FREEZING": + // Make sure we get a stable freezer state, so retry if the cgroup + // is still undergoing freezing. This should be a temporary delay. + time.Sleep(1 * time.Millisecond) + continue + default: + return cgroups.Undefined, fmt.Errorf("unknown freezer.state %q", state) + } + } +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/fs.go b/vendor/github.com/opencontainers/cgroups/fs/fs.go new file mode 100644 index 0000000000..23a8fb8742 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/fs.go @@ -0,0 +1,265 @@ +package fs + +import ( + "errors" + "fmt" + "os" + "sync" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +var subsystems = []subsystem{ + &CpusetGroup{}, + &DevicesGroup{}, + &MemoryGroup{}, + &CpuGroup{}, + &CpuacctGroup{}, + &PidsGroup{}, + &BlkioGroup{}, + &HugetlbGroup{}, + &NetClsGroup{}, + &NetPrioGroup{}, + &PerfEventGroup{}, + &FreezerGroup{}, + &RdmaGroup{}, + &NameGroup{GroupName: "name=systemd", Join: true}, + &NameGroup{GroupName: "misc", Join: true}, +} + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +func init() { + // If using cgroups-hybrid mode then add a "" controller indicating + // it should join the cgroups v2. + if cgroups.IsCgroup2HybridMode() { + subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true}) + } +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // GetStats fills in the stats for the subsystem. + GetStats(path string, stats *cgroups.Stats) error + // Apply creates and joins a cgroup, adding pid into it. Some + // subsystems use resources to pre-configure the cgroup parents + // before creating or joining it. + Apply(path string, r *cgroups.Resources, pid int) error + // Set sets the cgroup resources. + Set(path string, r *cgroups.Resources) error +} + +type Manager struct { + mu sync.Mutex + cgroups *cgroups.Cgroup + paths map[string]string +} + +func NewManager(cg *cgroups.Cgroup, paths map[string]string) (*Manager, error) { + // Some v1 controllers (cpu, cpuset, and devices) expect + // cgroups.Resources to not be nil in Apply. + if cg.Resources == nil { + return nil, errors.New("cgroup v1 manager needs cgroups.Resources to be set during manager creation") + } + if cg.Resources.Unified != nil { + return nil, cgroups.ErrV1NoUnified + } + + if paths == nil { + var err error + paths, err = initPaths(cg) + if err != nil { + return nil, err + } + } + + return &Manager{ + cgroups: cg, + paths: paths, + }, nil +} + +// isIgnorableError returns whether err is a permission error (in the loose +// sense of the word). This includes EROFS (which for an unprivileged user is +// basically a permission error) and EACCES (for similar reasons) as well as +// the normal EPERM. +func isIgnorableError(rootless bool, err error) bool { + // We do not ignore errors if we are root. + if !rootless { + return false + } + // Is it an ordinary EPERM? + if errors.Is(err, os.ErrPermission) { + return true + } + // Handle some specific syscall errors. + var errno unix.Errno + if errors.As(err, &errno) { + return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES + } + return false +} + +func (m *Manager) Apply(pid int) (retErr error) { + m.mu.Lock() + defer m.mu.Unlock() + + c := m.cgroups + + for _, sys := range subsystems { + name := sys.Name() + p, ok := m.paths[name] + if !ok { + continue + } + + if err := sys.Apply(p, c.Resources, pid); err != nil { + // In the case of rootless (including euid=0 in userns), where an + // explicit cgroup path hasn't been set, we don't bail on error in + // case of permission problems here, but do delete the path from + // the m.paths map, since it is either non-existent and could not + // be created, or the pid could not be added to it. + // + // Cases where limits for the subsystem have been set are handled + // later by Set, which fails with a friendly error (see + // if path == "" in Set). + if isIgnorableError(c.Rootless, err) && c.Path == "" { + retErr = cgroups.ErrRootless + delete(m.paths, name) + continue + } + return err + } + + } + return retErr +} + +func (m *Manager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + return cgroups.RemovePaths(m.paths) +} + +func (m *Manager) Path(subsys string) string { + m.mu.Lock() + defer m.mu.Unlock() + return m.paths[subsys] +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for _, sys := range subsystems { + path := m.paths[sys.Name()] + if path == "" { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + return stats, nil +} + +func (m *Manager) Set(r *cgroups.Resources) error { + if r == nil { + return nil + } + + if r.Unified != nil { + return cgroups.ErrV1NoUnified + } + + m.mu.Lock() + defer m.mu.Unlock() + for _, sys := range subsystems { + path := m.paths[sys.Name()] + if err := sys.Set(path, r); err != nil { + // When rootless is true, errors from the device subsystem + // are ignored, as it is really not expected to work. + if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) { + continue + } + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if path == "" { + // We never created a path for this cgroup, so we cannot set + // limits for it (though we have already tried at this point). + return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) + } + return err + } + } + + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state cgroups.FreezerState) error { + path := m.Path("freezer") + if path == "" { + return errors.New("cannot toggle freezer: cgroups not configured for container") + } + + prevState := m.cgroups.Resources.Freezer + m.cgroups.Resources.Freezer = state + freezer := &FreezerGroup{} + if err := freezer.Set(path, m.cgroups.Resources); err != nil { + m.cgroups.Resources.Freezer = prevState + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + return cgroups.GetPids(m.Path("devices")) +} + +func (m *Manager) GetAllPids() ([]int, error) { + return cgroups.GetAllPids(m.Path("devices")) +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + defer m.mu.Unlock() + return m.paths +} + +func (m *Manager) GetCgroups() (*cgroups.Cgroup, error) { + return m.cgroups, nil +} + +func (m *Manager) GetFreezerState() (cgroups.FreezerState, error) { + dir := m.Path("freezer") + // If the container doesn't have the freezer cgroup, say it's undefined. + if dir == "" { + return cgroups.Undefined, nil + } + freezer := &FreezerGroup{} + return freezer.GetState(dir) +} + +func (m *Manager) Exists() bool { + return cgroups.PathExists(m.Path("devices")) +} + +func OOMKillCount(path string) (uint64, error) { + return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") +} + +func (m *Manager) OOMKillCount() (uint64, error) { + c, err := OOMKillCount(m.Path("memory")) + // Ignore ENOENT when rootless as it couldn't create cgroup. + if err != nil && m.cgroups.Rootless && os.IsNotExist(err) { + err = nil + } + + return c, err +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go new file mode 100644 index 0000000000..698fd691e1 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go @@ -0,0 +1,83 @@ +package fs + +import ( + "errors" + "os" + "strconv" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +type HugetlbGroup struct{} + +func (s *HugetlbGroup) Name() string { + return "hugetlb" +} + +func (s *HugetlbGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *HugetlbGroup) Set(path string, r *cgroups.Resources) error { + const suffix = ".limit_in_bytes" + skipRsvd := false + + for _, hugetlb := range r.HugetlbLimit { + prefix := "hugetlb." + hugetlb.Pagesize + val := strconv.FormatUint(hugetlb.Limit, 10) + if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil { + return err + } + if skipRsvd { + continue + } + if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil { + if errors.Is(err, os.ErrNotExist) { + skipRsvd = true + continue + } + return err + } + } + + return nil +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + if !cgroups.PathExists(path) { + return nil + } + rsvd := ".rsvd" + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range cgroups.HugePageSizes() { + again: + prefix := "hugetlb." + pageSize + rsvd + + value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes") + if err != nil { + if rsvd != "" && errors.Is(err, os.ErrNotExist) { + rsvd = "" + goto again + } + return err + } + hugetlbStats.Usage = value + + value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes") + if err != nil { + return err + } + hugetlbStats.MaxUsage = value + + value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt") + if err != nil { + return err + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/memory.go b/vendor/github.com/opencontainers/cgroups/fs/memory.go new file mode 100644 index 0000000000..d92f2322be --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/memory.go @@ -0,0 +1,356 @@ +package fs + +import ( + "bufio" + "errors" + "fmt" + "math" + "os" + "path/filepath" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +const ( + cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" + cgroupMemoryLimit = "memory.limit_in_bytes" + cgroupMemoryUsage = "memory.usage_in_bytes" + cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" +) + +type MemoryGroup struct{} + +func (s *MemoryGroup) Name() string { + return "memory" +} + +func (s *MemoryGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func setMemory(path string, val int64) error { + if val == 0 { + return nil + } + + err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) + if !errors.Is(err, unix.EBUSY) { + return err + } + + // EBUSY means the kernel can't set new limit as it's too low + // (lower than the current usage). Return more specific error. + usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage) + if err != nil { + return err + } + max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage) + if err != nil { + return err + } + + return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max) +} + +func setSwap(path string, val int64) error { + if val == 0 { + return nil + } + + return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) +} + +func setMemoryAndSwap(path string, r *cgroups.Resources) error { + // If the memory update is set to -1 and the swap is not explicitly + // set, we should also set swap to -1, it means unlimited memory. + if r.Memory == -1 && r.MemorySwap == 0 { + // Only set swap if it's enabled in kernel + if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { + r.MemorySwap = -1 + } + } + + // When memory and swap memory are both set, we need to handle the cases + // for updating container. + if r.Memory != 0 && r.MemorySwap != 0 { + curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit) + if err != nil { + return err + } + + // When update memory limit, we should adapt the write sequence + // for memory and swap memory, so it won't fail because the new + // value and the old value don't fit kernel's validation. + if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) { + if err := setSwap(path, r.MemorySwap); err != nil { + return err + } + if err := setMemory(path, r.Memory); err != nil { + return err + } + return nil + } + } + + if err := setMemory(path, r.Memory); err != nil { + return err + } + if err := setSwap(path, r.MemorySwap); err != nil { + return err + } + + return nil +} + +func (s *MemoryGroup) Set(path string, r *cgroups.Resources) error { + if err := setMemoryAndSwap(path, r); err != nil { + return err + } + + // ignore KernelMemory and KernelMemoryTCP + + if r.MemoryReservation != 0 { + if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil { + return err + } + } + + if r.OomKillDisable { + if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 { + return nil + } else if *r.MemorySwappiness <= 100 { + if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil { + return err + } + } else { + return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness) + } + + return nil +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + const file = "memory.stat" + statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := fscommon.ParseKeyValue(sc.Text()) + if err != nil { + return &parseError{Path: path, File: file, Err: err} + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{ + Usage: swapUsage.Usage - memoryUsage.Usage, + Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt, + } + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") + if err != nil { + return err + } + stats.MemoryStats.KernelTCPUsage = kernelTCPUsage + + value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy") + if err != nil { + return err + } + if value == 1 { + stats.MemoryStats.UseHierarchy = true + } + + pagesByNUMA, err := getPageUsageByNUMA(path) + if err != nil { + return err + } + stats.MemoryStats.PageUsageByNUMA = pagesByNUMA + + return nil +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = "memory." + name + } + var ( + usage = moduleName + ".usage_in_bytes" + maxUsage = moduleName + ".max_usage_in_bytes" + failcnt = moduleName + ".failcnt" + limit = moduleName + ".limit_in_bytes" + ) + + value, err := fscommon.GetCgroupParamUint(path, usage) + if err != nil { + if name != "" && os.IsNotExist(err) { + // Ignore ENOENT as swap and kmem controllers + // are optional in the kernel. + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, err + } + memoryData.Usage = value + value, err = fscommon.GetCgroupParamUint(path, maxUsage) + if err != nil { + return cgroups.MemoryData{}, err + } + memoryData.MaxUsage = value + value, err = fscommon.GetCgroupParamUint(path, failcnt) + if err != nil { + return cgroups.MemoryData{}, err + } + memoryData.Failcnt = value + value, err = fscommon.GetCgroupParamUint(path, limit) + if err != nil { + if name == "kmem" && os.IsNotExist(err) { + // Ignore ENOENT as kmem.limit_in_bytes has + // been removed in newer kernels. + return memoryData, nil + } + + return cgroups.MemoryData{}, err + } + memoryData.Limit = value + + return memoryData, nil +} + +func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) { + const ( + maxColumns = math.MaxUint8 + 1 + file = "memory.numa_stat" + ) + stats := cgroups.PageUsageByNUMA{} + + fd, err := cgroups.OpenFile(path, file, os.O_RDONLY) + if os.IsNotExist(err) { + return stats, nil + } else if err != nil { + return stats, err + } + defer fd.Close() + + // File format is documented in linux/Documentation/cgroup-v1/memory.txt + // and it looks like this: + // + // total= N0= N1= ... + // file= N0= N1= ... + // anon= N0= N1= ... + // unevictable= N0= N1= ... + // hierarchical_= N0= N1= ... + + scanner := bufio.NewScanner(fd) + for scanner.Scan() { + var field *cgroups.PageStats + + line := scanner.Text() + columns := strings.SplitN(line, " ", maxColumns) + for i, column := range columns { + key, val, ok := strings.Cut(column, "=") + // Some custom kernels have non-standard fields, like + // numa_locality 0 0 0 0 0 0 0 0 0 0 + // numa_exectime 0 + if !ok { + if i == 0 { + // Ignore/skip those. + break + } else { + // The first column was already validated, + // so be strict to the rest. + return stats, malformedLine(path, file, line) + } + } + if i == 0 { // First column: key is name, val is total. + field = getNUMAField(&stats, key) + if field == nil { // unknown field (new kernel?) + break + } + field.Total, err = strconv.ParseUint(val, 0, 64) + if err != nil { + return stats, &parseError{Path: path, File: file, Err: err} + } + field.Nodes = map[uint8]uint64{} + } else { // Subsequent columns: key is N, val is usage. + if len(key) < 2 || key[0] != 'N' { + // This is definitely an error. + return stats, malformedLine(path, file, line) + } + + n, err := strconv.ParseUint(key[1:], 10, 8) + if err != nil { + return stats, &parseError{Path: path, File: file, Err: err} + } + + usage, err := strconv.ParseUint(val, 10, 64) + if err != nil { + return stats, &parseError{Path: path, File: file, Err: err} + } + + field.Nodes[uint8(n)] = usage + } + + } + } + if err := scanner.Err(); err != nil { + return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err} + } + + return stats, nil +} + +func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats { + switch name { + case "total": + return &stats.Total + case "file": + return &stats.File + case "anon": + return &stats.Anon + case "unevictable": + return &stats.Unevictable + case "hierarchical_total": + return &stats.Hierarchical.Total + case "hierarchical_file": + return &stats.Hierarchical.File + case "hierarchical_anon": + return &stats.Hierarchical.Anon + case "hierarchical_unevictable": + return &stats.Hierarchical.Unevictable + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/name.go b/vendor/github.com/opencontainers/cgroups/fs/name.go new file mode 100644 index 0000000000..28643519b5 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/name.go @@ -0,0 +1,30 @@ +package fs + +import ( + "github.com/opencontainers/cgroups" +) + +type NameGroup struct { + GroupName string + Join bool +} + +func (s *NameGroup) Name() string { + return s.GroupName +} + +func (s *NameGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + if s.Join { + // Ignore errors if the named cgroup does not exist. + _ = apply(path, pid) + } + return nil +} + +func (s *NameGroup) Set(_ string, _ *cgroups.Resources) error { + return nil +} + +func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/cgroups/fs/net_cls.go new file mode 100644 index 0000000000..2bd6c5ab21 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/net_cls.go @@ -0,0 +1,31 @@ +package fs + +import ( + "strconv" + + "github.com/opencontainers/cgroups" +) + +type NetClsGroup struct{} + +func (s *NetClsGroup) Name() string { + return "net_cls" +} + +func (s *NetClsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *NetClsGroup) Set(path string, r *cgroups.Resources) error { + if r.NetClsClassid != 0 { + if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/cgroups/fs/net_prio.go new file mode 100644 index 0000000000..b51682b6da --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/net_prio.go @@ -0,0 +1,29 @@ +package fs + +import ( + "github.com/opencontainers/cgroups" +) + +type NetPrioGroup struct{} + +func (s *NetPrioGroup) Name() string { + return "net_prio" +} + +func (s *NetPrioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *NetPrioGroup) Set(path string, r *cgroups.Resources) error { + for _, prioMap := range r.NetPrioIfpriomap { + if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/paths.go b/vendor/github.com/opencontainers/cgroups/fs/paths.go new file mode 100644 index 0000000000..edbe041ea8 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/paths.go @@ -0,0 +1,169 @@ +package fs + +import ( + "errors" + "os" + "path/filepath" + "sync" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/internal/path" +) + +// The absolute path to the root of the cgroup hierarchies. +var ( + cgroupRootLock sync.Mutex + cgroupRoot string +) + +const defaultCgroupRoot = "/sys/fs/cgroup" + +func initPaths(cg *cgroups.Cgroup) (map[string]string, error) { + root, err := rootPath() + if err != nil { + return nil, err + } + + inner, err := path.Inner(cg) + if err != nil { + return nil, err + } + + paths := make(map[string]string) + for _, sys := range subsystems { + name := sys.Name() + path, err := subsysPath(root, inner, name) + if err != nil { + // The non-presence of the devices subsystem + // is considered fatal for security reasons. + if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") { + continue + } + + return nil, err + } + paths[name] = path + } + + return paths, nil +} + +func tryDefaultCgroupRoot() string { + var st, pst unix.Stat_t + + // (1) it should be a directory... + err := unix.Lstat(defaultCgroupRoot, &st) + if err != nil || st.Mode&unix.S_IFDIR == 0 { + return "" + } + + // (2) ... and a mount point ... + err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst) + if err != nil { + return "" + } + + if st.Dev == pst.Dev { + // parent dir has the same dev -- not a mount point + return "" + } + + // (3) ... of 'tmpfs' fs type. + var fst unix.Statfs_t + err = unix.Statfs(defaultCgroupRoot, &fst) + if err != nil || fst.Type != unix.TMPFS_MAGIC { + return "" + } + + // (4) it should have at least 1 entry ... + dir, err := os.Open(defaultCgroupRoot) + if err != nil { + return "" + } + defer dir.Close() + names, err := dir.Readdirnames(1) + if err != nil { + return "" + } + if len(names) < 1 { + return "" + } + // ... which is a cgroup mount point. + err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst) + if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { + return "" + } + + return defaultCgroupRoot +} + +// rootPath finds and returns path to the root of the cgroup hierarchies. +func rootPath() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + // fast path + cgroupRoot = tryDefaultCgroupRoot() + if cgroupRoot != "" { + return cgroupRoot, nil + } + + // slow path: parse mountinfo + mi, err := cgroups.GetCgroupMounts(false) + if err != nil { + return "", err + } + if len(mi) < 1 { + return "", errors.New("no cgroup mount found in mountinfo") + } + + // Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"), + // use its parent directory. + root := filepath.Dir(mi[0].Mountpoint) + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +func subsysPath(root, inner, subsystem string) (string, error) { + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(inner) { + mnt, err := cgroups.FindCgroupMountpoint(root, subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. + return filepath.Join(root, filepath.Base(mnt), inner), nil + } + + // Use GetOwnCgroupPath for dind-like cases, when cgroupns is not + // available. This is ugly. + parentPath, err := cgroups.GetOwnCgroupPath(subsystem) + if err != nil { + return "", err + } + + return filepath.Join(parentPath, inner), nil +} + +func apply(path string, pid int) error { + if path == "" { + return nil + } + if err := os.MkdirAll(path, 0o755); err != nil { + return err + } + return cgroups.WriteCgroupProc(path, pid) +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/cgroups/fs/perf_event.go new file mode 100644 index 0000000000..929c412a3a --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/perf_event.go @@ -0,0 +1,23 @@ +package fs + +import ( + "github.com/opencontainers/cgroups" +) + +type PerfEventGroup struct{} + +func (s *PerfEventGroup) Name() string { + return "perf_event" +} + +func (s *PerfEventGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *PerfEventGroup) Set(_ string, _ *cgroups.Resources) error { + return nil +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/pids.go b/vendor/github.com/opencontainers/cgroups/fs/pids.go new file mode 100644 index 0000000000..9319761e6a --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/pids.go @@ -0,0 +1,61 @@ +package fs + +import ( + "math" + "strconv" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +type PidsGroup struct{} + +func (s *PidsGroup) Name() string { + return "pids" +} + +func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *PidsGroup) Set(path string, r *cgroups.Resources) error { + if r.PidsLimit != 0 { + // "max" is the fallback value. + limit := "max" + + if r.PidsLimit > 0 { + limit = strconv.FormatInt(r.PidsLimit, 10) + } + + if err := cgroups.WriteFile(path, "pids.max", limit); err != nil { + return err + } + } + + return nil +} + +func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { + if !cgroups.PathExists(path) { + return nil + } + current, err := fscommon.GetCgroupParamUint(path, "pids.current") + if err != nil { + return err + } + + max, err := fscommon.GetCgroupParamUint(path, "pids.max") + if err != nil { + return err + } + // If no limit is set, read from pids.max returns "max", which is + // converted to MaxUint64 by GetCgroupParamUint. Historically, we + // represent "no limit" for pids as 0, thus this conversion. + if max == math.MaxUint64 { + max = 0 + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs/rdma.go b/vendor/github.com/opencontainers/cgroups/fs/rdma.go new file mode 100644 index 0000000000..4b175365f2 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs/rdma.go @@ -0,0 +1,24 @@ +package fs + +import ( + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +type RdmaGroup struct{} + +func (s *RdmaGroup) Name() string { + return "rdma" +} + +func (s *RdmaGroup) Apply(path string, _ *cgroups.Resources, pid int) error { + return apply(path, pid) +} + +func (s *RdmaGroup) Set(path string, r *cgroups.Resources) error { + return fscommon.RdmaSet(path, r) +} + +func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error { + return fscommon.RdmaGetStats(path, stats) +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/cpu.go b/vendor/github.com/opencontainers/cgroups/fs2/cpu.go new file mode 100644 index 0000000000..8a22a32c45 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/cpu.go @@ -0,0 +1,123 @@ +package fs2 + +import ( + "bufio" + "errors" + "os" + "strconv" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +func isCPUSet(r *cgroups.Resources) bool { + return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil +} + +func setCPU(dirPath string, r *cgroups.Resources) error { + if !isCPUSet(r) { + return nil + } + + if r.CPUIdle != nil { + if err := cgroups.WriteFile(dirPath, "cpu.idle", strconv.FormatInt(*r.CPUIdle, 10)); err != nil { + return err + } + } + + // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. + if r.CpuWeight != 0 { + if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil { + return err + } + } + + var burst string + if r.CpuBurst != nil { + burst = strconv.FormatUint(*r.CpuBurst, 10) + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + burst = "" + } + } + if r.CpuQuota != 0 || r.CpuPeriod != 0 { + str := "max" + if r.CpuQuota > 0 { + str = strconv.FormatInt(r.CpuQuota, 10) + } + period := r.CpuPeriod + if period == 0 { + // This default value is documented in + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html + period = 100000 + } + str += " " + strconv.FormatUint(period, 10) + if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { + return err + } + if burst != "" { + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + return err + } + } + } + + return nil +} + +func statCpu(dirPath string, stats *cgroups.Stats) error { + const file = "cpu.stat" + f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY) + if err != nil { + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := fscommon.ParseKeyValue(sc.Text()) + if err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + switch t { + case "usage_usec": + stats.CpuStats.CpuUsage.TotalUsage = v * 1000 + + case "user_usec": + stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000 + + case "system_usec": + stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000 + + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_usec": + stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000 + + case "nr_bursts": + stats.CpuStats.BurstData.BurstsPeriods = v + + case "burst_usec": + stats.CpuStats.BurstData.BurstTime = v * 1000 + } + } + if err := sc.Err(); err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/cpuset.go b/vendor/github.com/opencontainers/cgroups/fs2/cpuset.go new file mode 100644 index 0000000000..9399919a06 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/cpuset.go @@ -0,0 +1,27 @@ +package fs2 + +import ( + "github.com/opencontainers/cgroups" +) + +func isCpusetSet(r *cgroups.Resources) bool { + return r.CpusetCpus != "" || r.CpusetMems != "" +} + +func setCpuset(dirPath string, r *cgroups.Resources) error { + if !isCpusetSet(r) { + return nil + } + + if r.CpusetCpus != "" { + if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil { + return err + } + } + if r.CpusetMems != "" { + if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/create.go b/vendor/github.com/opencontainers/cgroups/fs2/create.go new file mode 100644 index 0000000000..565ca88307 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/create.go @@ -0,0 +1,151 @@ +package fs2 + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/opencontainers/cgroups" +) + +func supportedControllers() (string, error) { + return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers") +} + +// needAnyControllers returns whether we enable some supported controllers or not, +// based on (1) controllers available and (2) resources that are being set. +// We don't check "pseudo" controllers such as +// "freezer" and "devices". +func needAnyControllers(r *cgroups.Resources) (bool, error) { + if r == nil { + return false, nil + } + + // list of all available controllers + content, err := supportedControllers() + if err != nil { + return false, err + } + avail := make(map[string]struct{}) + for _, ctr := range strings.Fields(content) { + avail[ctr] = struct{}{} + } + + // check whether the controller if available or not + have := func(controller string) bool { + _, ok := avail[controller] + return ok + } + + if isPidsSet(r) && have("pids") { + return true, nil + } + if isMemorySet(r) && have("memory") { + return true, nil + } + if isIoSet(r) && have("io") { + return true, nil + } + if isCPUSet(r) && have("cpu") { + return true, nil + } + if isCpusetSet(r) && have("cpuset") { + return true, nil + } + if isHugeTlbSet(r) && have("hugetlb") { + return true, nil + } + + return false, nil +} + +// containsDomainController returns whether the current config contains domain controller or not. +// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html +// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids. +func containsDomainController(r *cgroups.Resources) bool { + return isMemorySet(r) || isIoSet(r) || isCPUSet(r) || isHugeTlbSet(r) +} + +// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers. +func CreateCgroupPath(path string, c *cgroups.Cgroup) (Err error) { + if !strings.HasPrefix(path, UnifiedMountpoint) { + return fmt.Errorf("invalid cgroup path %s", path) + } + + content, err := supportedControllers() + if err != nil { + return err + } + + const ( + cgTypeFile = "cgroup.type" + cgStCtlFile = "cgroup.subtree_control" + ) + ctrs := strings.Fields(content) + res := "+" + strings.Join(ctrs, " +") + + elements := strings.Split(path, "/") + elements = elements[3:] + current := "/sys/fs" + for i, e := range elements { + current = filepath.Join(current, e) + if i > 0 { + if err := os.Mkdir(current, 0o755); err != nil { + if !os.IsExist(err) { + return err + } + } else { + // If the directory was created, be sure it is not left around on errors. + current := current + defer func() { + if Err != nil { + os.Remove(current) + } + }() + } + cgType, _ := cgroups.ReadFile(current, cgTypeFile) + cgType = strings.TrimSpace(cgType) + switch cgType { + // If the cgroup is in an invalid mode (usually this means there's an internal + // process in the cgroup tree, because we created a cgroup under an + // already-populated-by-other-processes cgroup), then we have to error out if + // the user requested controllers which are not thread-aware. However, if all + // the controllers requested are thread-aware we can simply put the cgroup into + // threaded mode. + case "domain invalid": + if containsDomainController(c.Resources) { + return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current) + } else { + // Not entirely correct (in theory we'd always want to be a domain -- + // since that means we're a properly delegated cgroup subtree) but in + // this case there's not much we can do and it's better than giving an + // error. + _ = cgroups.WriteFile(current, cgTypeFile, "threaded") + } + // If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers + // (and you cannot usually take a cgroup out of threaded mode). + case "domain threaded": + fallthrough + case "threaded": + if containsDomainController(c.Resources) { + return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType) + } + } + } + // enable all supported controllers + if i < len(elements)-1 { + if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil { + // try write one by one + allCtrs := strings.Split(res, " ") + for _, ctr := range allCtrs { + _ = cgroups.WriteFile(current, cgStCtlFile, ctr) + } + } + // Some controllers might not be enabled when rootless or containerized, + // but we don't catch the error here. (Caught in setXXX() functions.) + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/defaultpath.go b/vendor/github.com/opencontainers/cgroups/fs2/defaultpath.go new file mode 100644 index 0000000000..0bc479de3a --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/defaultpath.go @@ -0,0 +1,80 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package fs2 + +import ( + "bufio" + "errors" + "io" + "os" + "path/filepath" + "strings" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/internal/path" +) + +const UnifiedMountpoint = "/sys/fs/cgroup" + +func defaultDirPath(c *cgroups.Cgroup) (string, error) { + innerPath, err := path.Inner(c) + if err != nil { + return "", err + } + + if filepath.IsAbs(innerPath) { + return filepath.Join(UnifiedMountpoint, innerPath), nil + } + + // we don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. + ownCgroup, err := parseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + // The current user scope most probably has tasks in it already, + // making it impossible to enable controllers for its sub-cgroup. + // A parent cgroup (with no tasks in it) is what we need. + ownCgroup = filepath.Dir(ownCgroup) + + return filepath.Join(UnifiedMountpoint, ownCgroup, innerPath), nil +} + +// parseCgroupFile parses /proc/PID/cgroup file and return string +func parseCgroupFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + return parseCgroupFromReader(f) +} + +func parseCgroupFromReader(r io.Reader) (string, error) { + s := bufio.NewScanner(r) + for s.Scan() { + // "0::/user.slice/user-1001.slice/session-1.scope" + if path, ok := strings.CutPrefix(s.Text(), "0::"); ok { + return path, nil + } + } + if err := s.Err(); err != nil { + return "", err + } + return "", errors.New("cgroup path not found") +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/freezer.go b/vendor/github.com/opencontainers/cgroups/fs2/freezer.go new file mode 100644 index 0000000000..6307e68db7 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/freezer.go @@ -0,0 +1,140 @@ +package fs2 + +import ( + "bufio" + "errors" + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" +) + +func setFreezer(dirPath string, state cgroups.FreezerState) error { + var stateStr string + switch state { + case cgroups.Undefined: + return nil + case cgroups.Frozen: + stateStr = "1" + case cgroups.Thawed: + stateStr = "0" + default: + return fmt.Errorf("invalid freezer state %q requested", state) + } + + fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR) + if err != nil { + // We can ignore this request as long as the user didn't ask us to + // freeze the container (since without the freezer cgroup, that's a + // no-op). + if state != cgroups.Frozen { + return nil + } + return fmt.Errorf("freezer not supported: %w", err) + } + defer fd.Close() + + if _, err := fd.WriteString(stateStr); err != nil { + return err + } + // Confirm that the cgroup did actually change states. + if actualState, err := readFreezer(dirPath, fd); err != nil { + return err + } else if actualState != state { + return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState) + } + return nil +} + +func getFreezer(dirPath string) (cgroups.FreezerState, error) { + fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY) + if err != nil { + // If the kernel is too old, then we just treat the freezer as + // being in an "undefined" state and ignore the error. + return cgroups.Undefined, ignoreNotExistOrNoDeviceError(err) + } + defer fd.Close() + + return readFreezer(dirPath, fd) +} + +func readFreezer(dirPath string, fd *os.File) (cgroups.FreezerState, error) { + if _, err := fd.Seek(0, 0); err != nil { + // If the cgroup path is deleted at this point, then we just treat the freezer as + // being in an "undefined" state and ignore the error. + return cgroups.Undefined, ignoreNotExistOrNoDeviceError(err) + } + state := make([]byte, 2) + if _, err := fd.Read(state); err != nil { + // If the cgroup path is deleted at this point, then we just treat the freezer as + // being in an "undefined" state and ignore the error. + return cgroups.Undefined, ignoreNotExistOrNoDeviceError(err) + } + switch string(state) { + case "0\n": + return cgroups.Thawed, nil + case "1\n": + return waitFrozen(dirPath) + default: + return cgroups.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state) + } +} + +// ignoreNotExistOrNoDeviceError checks if the error is either a "not exist" error +// or a "no device" error, and returns nil in those cases. Otherwise, it returns the error. +func ignoreNotExistOrNoDeviceError(err error) error { + // We can safely ignore the error in the following two common situations: + // 1. The cgroup path does not exist at the time of opening(eg: the kernel is too old) + // — indicated by os.IsNotExist. + // 2. The cgroup path is deleted during the seek/read operation — indicated by + // errors.Is(err, unix.ENODEV). + // These conditions are expected and do not require special handling. + if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) { + return nil + } + return err +} + +// waitFrozen polls cgroup.events until it sees "frozen 1" in it. +func waitFrozen(dirPath string) (cgroups.FreezerState, error) { + fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY) + if err != nil { + return cgroups.Undefined, err + } + defer fd.Close() + + // XXX: Simple wait/read/retry is used here. An implementation + // based on poll(2) or inotify(7) is possible, but it makes the code + // much more complicated. Maybe address this later. + const ( + // Perform maxIter with waitTime in between iterations. + waitTime = 10 * time.Millisecond + maxIter = 1000 + ) + scanner := bufio.NewScanner(fd) + for i := 0; scanner.Scan(); { + if i == maxIter { + return cgroups.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter) + } + if val, ok := strings.CutPrefix(scanner.Text(), "frozen "); ok { + if val[0] == '1' { + return cgroups.Frozen, nil + } + + i++ + // wait, then re-read + time.Sleep(waitTime) + _, err := fd.Seek(0, 0) + if err != nil { + return cgroups.Undefined, err + } + } + } + // Should only reach here either on read error, + // or if the file does not contain "frozen " line. + return cgroups.Undefined, scanner.Err() +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/cgroups/fs2/fs2.go new file mode 100644 index 0000000000..c5d5a1f8ec --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/fs2.go @@ -0,0 +1,316 @@ +package fs2 + +import ( + "errors" + "fmt" + "os" + "strings" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +type parseError = fscommon.ParseError + +type Manager struct { + config *cgroups.Cgroup + // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" + dirPath string + // controllers is content of "cgroup.controllers" file. + // excludes pseudo-controllers ("devices" and "freezer"). + controllers map[string]struct{} +} + +// NewManager creates a manager for cgroup v2 unified hierarchy. +// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". +// If dirPath is empty, it is automatically set using config. +func NewManager(config *cgroups.Cgroup, dirPath string) (*Manager, error) { + if dirPath == "" { + var err error + dirPath, err = defaultDirPath(config) + if err != nil { + return nil, err + } + } + + m := &Manager{ + config: config, + dirPath: dirPath, + } + return m, nil +} + +func (m *Manager) getControllers() error { + if m.controllers != nil { + return nil + } + + data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers") + if err != nil { + if m.config.Rootless && m.config.Path == "" { + return nil + } + return err + } + fields := strings.Fields(data) + m.controllers = make(map[string]struct{}, len(fields)) + for _, c := range fields { + m.controllers[c] = struct{}{} + } + + return nil +} + +func (m *Manager) Apply(pid int) error { + if err := CreateCgroupPath(m.dirPath, m.config); err != nil { + // Related tests: + // - "runc create (no limits + no cgrouppath + no permission) succeeds" + // - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" + // - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if m.config.Rootless { + if m.config.Path == "" { + if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed { + return cgroups.ErrRootless + } + return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err) + } + } + return err + } + if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil { + return err + } + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + return cgroups.GetPids(m.dirPath) +} + +func (m *Manager) GetAllPids() ([]int, error) { + return cgroups.GetAllPids(m.dirPath) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + var errs []error + + st := cgroups.NewStats() + + // pids (since kernel 4.5) + if err := statPids(m.dirPath, st); err != nil { + errs = append(errs, err) + } + // memory (since kernel 4.5) + if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + // io (since kernel 4.5) + if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + // cpu (since kernel 4.15) + // Note cpu.stat is available even if the controller is not enabled. + if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + // PSI (since kernel 4.20). + var err error + if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { + errs = append(errs, err) + } + if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { + errs = append(errs, err) + } + if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { + errs = append(errs, err) + } + // hugetlb (since kernel 5.6) + if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + // rdma (since kernel 4.11) + if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + // misc (since kernel 5.13) + if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + if len(errs) > 0 && !m.config.Rootless { + return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) + } + return st, nil +} + +func (m *Manager) Freeze(state cgroups.FreezerState) error { + if m.config.Resources == nil { + return errors.New("cannot toggle freezer: cgroups not configured for container") + } + if err := setFreezer(m.dirPath, state); err != nil { + return err + } + m.config.Resources.Freezer = state + return nil +} + +func (m *Manager) Destroy() error { + return cgroups.RemovePath(m.dirPath) +} + +func (m *Manager) Path(_ string) string { + return m.dirPath +} + +func (m *Manager) Set(r *cgroups.Resources) error { + if r == nil { + return nil + } + if err := m.getControllers(); err != nil { + return err + } + // pids (since kernel 4.5) + if err := setPids(m.dirPath, r); err != nil { + return err + } + // memory (since kernel 4.5) + if err := setMemory(m.dirPath, r); err != nil { + return err + } + // io (since kernel 4.5) + if err := setIo(m.dirPath, r); err != nil { + return err + } + // cpu (since kernel 4.15) + if err := setCPU(m.dirPath, r); err != nil { + return err + } + // devices (since kernel 4.15, pseudo-controller) + // + // When rootless is true, errors from the device subsystem are ignored because it is really not expected to work. + // However, errors from other subsystems are not ignored. + // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" + if err := setDevices(m.dirPath, r); err != nil { + if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) { + return err + } + } + // cpuset (since kernel 5.0) + if err := setCpuset(m.dirPath, r); err != nil { + return err + } + // hugetlb (since kernel 5.6) + if err := setHugeTlb(m.dirPath, r); err != nil { + return err + } + // rdma (since kernel 4.11) + if err := fscommon.RdmaSet(m.dirPath, r); err != nil { + return err + } + // freezer (since kernel 5.2, pseudo-controller) + if err := setFreezer(m.dirPath, r.Freezer); err != nil { + return err + } + if err := m.setUnified(r.Unified); err != nil { + return err + } + m.config.Resources = r + return nil +} + +func setDevices(dirPath string, r *cgroups.Resources) error { + if cgroups.DevicesSetV2 == nil { + if len(r.Devices) > 0 { + return cgroups.ErrDevicesUnsupported + } + return nil + } + return cgroups.DevicesSetV2(dirPath, r) +} + +func (m *Manager) setUnified(res map[string]string) error { + for k, v := range res { + if strings.Contains(k, "/") { + return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) + } + if err := cgroups.WriteFileByLine(m.dirPath, k, v); err != nil { + // Check for both EPERM and ENOENT since O_CREAT is used by WriteFile. + if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { + // Check if a controller is available, + // to give more specific error if not. + c, _, ok := strings.Cut(k, ".") + if !ok { + return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k) + } + if _, ok := m.controllers[c]; !ok && c != "cgroup" { + return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c) + } + } + return fmt.Errorf("unable to set unified resource %q: %w", k, err) + } + } + + return nil +} + +func (m *Manager) GetPaths() map[string]string { + paths := make(map[string]string, 1) + paths[""] = m.dirPath + return paths +} + +func (m *Manager) GetCgroups() (*cgroups.Cgroup, error) { + return m.config, nil +} + +func (m *Manager) GetFreezerState() (cgroups.FreezerState, error) { + return getFreezer(m.dirPath) +} + +func (m *Manager) Exists() bool { + return cgroups.PathExists(m.dirPath) +} + +func OOMKillCount(path string) (uint64, error) { + return fscommon.GetValueByKey(path, "memory.events", "oom_kill") +} + +func (m *Manager) OOMKillCount() (uint64, error) { + c, err := OOMKillCount(m.dirPath) + if err != nil && m.config.Rootless && os.IsNotExist(err) { + err = nil + } + + return c, err +} + +func CheckMemoryUsage(dirPath string, r *cgroups.Resources) error { + if !r.MemoryCheckBeforeUpdate { + return nil + } + + if r.Memory <= 0 && r.MemorySwap <= 0 { + return nil + } + + usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current") + if err != nil { + // This check is on best-effort basis, so if we can't read the + // current usage (cgroup not yet created, or any other error), + // we should not fail. + return nil + } + + if r.MemorySwap > 0 { + if uint64(r.MemorySwap) <= usage { + return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage) + } + } + + if r.Memory > 0 { + if uint64(r.Memory) <= usage { + return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage) + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/hugetlb.go b/vendor/github.com/opencontainers/cgroups/fs2/hugetlb.go new file mode 100644 index 0000000000..bab9b55687 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/hugetlb.go @@ -0,0 +1,70 @@ +package fs2 + +import ( + "errors" + "os" + "strconv" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +func isHugeTlbSet(r *cgroups.Resources) bool { + return len(r.HugetlbLimit) > 0 +} + +func setHugeTlb(dirPath string, r *cgroups.Resources) error { + if !isHugeTlbSet(r) { + return nil + } + const suffix = ".max" + skipRsvd := false + for _, hugetlb := range r.HugetlbLimit { + prefix := "hugetlb." + hugetlb.Pagesize + val := strconv.FormatUint(hugetlb.Limit, 10) + if err := cgroups.WriteFile(dirPath, prefix+suffix, val); err != nil { + return err + } + if skipRsvd { + continue + } + if err := cgroups.WriteFile(dirPath, prefix+".rsvd"+suffix, val); err != nil { + if errors.Is(err, os.ErrNotExist) { + skipRsvd = true + continue + } + return err + } + } + + return nil +} + +func statHugeTlb(dirPath string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + rsvd := ".rsvd" + + for _, pagesize := range cgroups.HugePageSizes() { + prefix := "hugetlb." + pagesize + again: + value, err := fscommon.GetCgroupParamUint(dirPath, prefix+rsvd+".current") + if err != nil { + if rsvd != "" && errors.Is(err, os.ErrNotExist) { + rsvd = "" + goto again + } + return err + } + hugetlbStats.Usage = value + + value, err = fscommon.GetValueByKey(dirPath, prefix+".events", "max") + if err != nil { + return err + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pagesize] = hugetlbStats + } + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/io.go b/vendor/github.com/opencontainers/cgroups/fs2/io.go new file mode 100644 index 0000000000..0f6ef7fea5 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/io.go @@ -0,0 +1,192 @@ +package fs2 + +import ( + "bufio" + "bytes" + "fmt" + "os" + "strconv" + "strings" + + "github.com/sirupsen/logrus" + + "github.com/opencontainers/cgroups" +) + +func isIoSet(r *cgroups.Resources) bool { + return r.BlkioWeight != 0 || + len(r.BlkioWeightDevice) > 0 || + len(r.BlkioThrottleReadBpsDevice) > 0 || + len(r.BlkioThrottleWriteBpsDevice) > 0 || + len(r.BlkioThrottleReadIOPSDevice) > 0 || + len(r.BlkioThrottleWriteIOPSDevice) > 0 +} + +// bfqDeviceWeightSupported checks for per-device BFQ weight support (added +// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight". +func bfqDeviceWeightSupported(bfq *os.File) bool { + if bfq == nil { + return false + } + _, _ = bfq.Seek(0, 0) + buf := make([]byte, 32) + _, _ = bfq.Read(buf) + // If only a single number (default weight) if read back, we have older kernel. + _, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64) + return err != nil +} + +func setIo(dirPath string, r *cgroups.Resources) error { + if !isIoSet(r) { + return nil + } + + // If BFQ IO scheduler is available, use it. + var bfq *os.File + if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 { + var err error + bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR) + if err == nil { + defer bfq.Close() + } else if !os.IsNotExist(err) { + return err + } + } + + if r.BlkioWeight != 0 { + if bfq != nil { // Use BFQ. + if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { + return err + } + } else { + // Fallback to io.weight with a conversion scheme. + v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight) + if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil { + return err + } + } + } + if bfqDeviceWeightSupported(bfq) { + for _, wd := range r.BlkioWeightDevice { + if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil { + return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err) + } + } + } + for _, td := range r.BlkioThrottleReadBpsDevice { + if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleWriteBpsDevice { + if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleReadIOPSDevice { + if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil { + return err + } + } + for _, td := range r.BlkioThrottleWriteIOPSDevice { + if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil { + return err + } + } + + return nil +} + +func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) { + ret := map[string][]string{} + f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY) + if err != nil { + return nil, err + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + ret[parts[0]] = parts[1:] + } + if err := scanner.Err(); err != nil { + return nil, &parseError{Path: dirPath, File: name, Err: err} + } + return ret, nil +} + +func statIo(dirPath string, stats *cgroups.Stats) error { + const file = "io.stat" + values, err := readCgroup2MapFile(dirPath, file) + if err != nil { + return err + } + // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt + var parsedStats cgroups.BlkioStats + for k, v := range values { + d := strings.Split(k, ":") + if len(d) != 2 { + continue + } + major, err := strconv.ParseUint(d[0], 10, 64) + if err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + minor, err := strconv.ParseUint(d[1], 10, 64) + if err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + + for _, item := range v { + d := strings.Split(item, "=") + if len(d) != 2 { + continue + } + op := d[0] + + // Map to the cgroupv1 naming and layout (in separate tables). + var targetTable *[]cgroups.BlkioStatEntry + switch op { + // Equivalent to cgroupv1's blkio.io_service_bytes. + case "rbytes": + op = "Read" + targetTable = &parsedStats.IoServiceBytesRecursive + case "wbytes": + op = "Write" + targetTable = &parsedStats.IoServiceBytesRecursive + // Equivalent to cgroupv1's blkio.io_serviced. + case "rios": + op = "Read" + targetTable = &parsedStats.IoServicedRecursive + case "wios": + op = "Write" + targetTable = &parsedStats.IoServicedRecursive + default: + // Skip over entries we cannot map to cgroupv1 stats for now. + // In the future we should expand the stats struct to include + // them. + logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item) + continue + } + + value, err := strconv.ParseUint(d[1], 10, 64) + if err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + + entry := cgroups.BlkioStatEntry{ + Op: op, + Major: major, + Minor: minor, + Value: value, + } + *targetTable = append(*targetTable, entry) + } + } + stats.BlkioStats = parsedStats + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/memory.go b/vendor/github.com/opencontainers/cgroups/fs2/memory.go new file mode 100644 index 0000000000..761330756f --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/memory.go @@ -0,0 +1,238 @@ +package fs2 + +import ( + "bufio" + "errors" + "math" + "os" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +// numToStr converts an int64 value to a string for writing to a +// cgroupv2 files with .min, .max, .low, or .high suffix. +// The value of -1 is converted to "max" for cgroupv1 compatibility +// (which used to write -1 to remove the limit). +func numToStr(value int64) string { + switch value { + case 0: + return "" + case -1: + return "max" + } + return strconv.FormatInt(value, 10) +} + +func isMemorySet(r *cgroups.Resources) bool { + return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0 +} + +func setMemory(dirPath string, r *cgroups.Resources) error { + if !isMemorySet(r) { + return nil + } + + if err := CheckMemoryUsage(dirPath, r); err != nil { + return err + } + + swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) + if err != nil { + return err + } + swapStr := numToStr(swap) + if swapStr == "" && swap == 0 && r.MemorySwap > 0 { + // memory and memorySwap set to the same value -- disable swap + swapStr = "0" + } + // never write empty string to `memory.swap.max`, it means set to 0. + if swapStr != "" { + if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil { + // If swap is not enabled, silently ignore setting to max or disabling it. + if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) { //nolint:staticcheck // Ignore "QF1001: could apply De Morgan's law". + return err + } + } + } + + if val := numToStr(r.Memory); val != "" { + if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil { + return err + } + } + + // cgroup.Resources.KernelMemory is ignored + + if val := numToStr(r.MemoryReservation); val != "" { + if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil { + return err + } + } + + return nil +} + +func statMemory(dirPath string, stats *cgroups.Stats) error { + const file = "memory.stat" + statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY) + if err != nil { + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := fscommon.ParseKeyValue(sc.Text()) + if err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + stats.MemoryStats.Stats[t] = v + } + if err := sc.Err(); err != nil { + return &parseError{Path: dirPath, File: file, Err: err} + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"] + // Unlike cgroup v1 which has memory.use_hierarchy binary knob, + // cgroup v2 is always hierarchical. + stats.MemoryStats.UseHierarchy = true + + memoryUsage, err := getMemoryDataV2(dirPath, "") + if err != nil { + if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint { + // The root cgroup does not have memory.{current,max,peak} + // so emulate those using data from /proc/meminfo and + // /sys/fs/cgroup/memory.stat + return rootStatsFromMeminfo(stats) + } + return err + } + stats.MemoryStats.Usage = memoryUsage + swapOnlyUsage, err := getMemoryDataV2(dirPath, "swap") + if err != nil { + return err + } + stats.MemoryStats.SwapOnlyUsage = swapOnlyUsage + swapUsage := swapOnlyUsage + // As cgroup v1 reports SwapUsage values as mem+swap combined, + // while in cgroup v2 swap values do not include memory, + // report combined mem+swap for v1 compatibility. + swapUsage.Usage += memoryUsage.Usage + if swapUsage.Limit != math.MaxUint64 { + swapUsage.Limit += memoryUsage.Limit + } + // The `MaxUsage` of mem+swap cannot simply combine mem with + // swap. So set it to 0 for v1 compatibility. + swapUsage.MaxUsage = 0 + stats.MemoryStats.SwapUsage = swapUsage + + return nil +} + +func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = "memory." + name + } + usage := moduleName + ".current" + limit := moduleName + ".max" + maxUsage := moduleName + ".peak" + + value, err := fscommon.GetCgroupParamUint(path, usage) + if err != nil { + if name != "" && os.IsNotExist(err) { + // Ignore EEXIST as there's no swap accounting + // if kernel CONFIG_MEMCG_SWAP is not set or + // swapaccount=0 kernel boot parameter is given. + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, err + } + memoryData.Usage = value + + value, err = fscommon.GetCgroupParamUint(path, limit) + if err != nil { + return cgroups.MemoryData{}, err + } + memoryData.Limit = value + + // `memory.peak` since kernel 5.19 + // `memory.swap.peak` since kernel 6.5 + value, err = fscommon.GetCgroupParamUint(path, maxUsage) + if err != nil && !os.IsNotExist(err) { + return cgroups.MemoryData{}, err + } + memoryData.MaxUsage = value + + return memoryData, nil +} + +func rootStatsFromMeminfo(stats *cgroups.Stats) error { + const file = "/proc/meminfo" + f, err := os.Open(file) + if err != nil { + return err + } + defer f.Close() + + // Fields we are interested in. + var ( + swap_free uint64 + swap_total uint64 + ) + mem := map[string]*uint64{ + "SwapFree": &swap_free, + "SwapTotal": &swap_total, + } + + found := 0 + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.SplitN(sc.Text(), ":", 3) + if len(parts) != 2 { + // Should not happen. + continue + } + k := parts[0] + p, ok := mem[k] + if !ok { + // Unknown field -- not interested. + continue + } + vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB")) + *p, err = strconv.ParseUint(vStr, 10, 64) + if err != nil { + return &parseError{File: file, Err: errors.New("bad value for " + k)} + } + + found++ + if found == len(mem) { + // Got everything we need -- skip the rest. + break + } + } + if err := sc.Err(); err != nil { + return &parseError{Path: "", File: file, Err: err} + } + + // cgroup v1 `usage_in_bytes` reports memory usage as the sum of + // - rss (NR_ANON_MAPPED) + // - cache (NR_FILE_PAGES) + // cgroup v1 reports SwapUsage values as mem+swap combined + // cgroup v2 reports rss and cache as anon and file. + // sum `anon` + `file` to report the same value as `usage_in_bytes` in v1. + // sum swap usage as combined mem+swap usage for consistency as well. + stats.MemoryStats.Usage.Usage = stats.MemoryStats.Stats["anon"] + stats.MemoryStats.Stats["file"] + stats.MemoryStats.Usage.Limit = math.MaxUint64 + stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024 + stats.MemoryStats.SwapUsage.Limit = math.MaxUint64 + stats.MemoryStats.SwapUsage.Usage += stats.MemoryStats.Usage.Usage + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/misc.go b/vendor/github.com/opencontainers/cgroups/fs2/misc.go new file mode 100644 index 0000000000..f20136b66d --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/misc.go @@ -0,0 +1,52 @@ +package fs2 + +import ( + "bufio" + "os" + "strings" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +func statMisc(dirPath string, stats *cgroups.Stats) error { + for _, file := range []string{"current", "events"} { + fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY) + if err != nil { + return err + } + + s := bufio.NewScanner(fd) + for s.Scan() { + key, value, err := fscommon.ParseKeyValue(s.Text()) + if err != nil { + fd.Close() + return err + } + + key = strings.TrimSuffix(key, ".max") + + if _, ok := stats.MiscStats[key]; !ok { + stats.MiscStats[key] = cgroups.MiscStats{} + } + + tmp := stats.MiscStats[key] + + switch file { + case "current": + tmp.Usage = value + case "events": + tmp.Events = value + } + + stats.MiscStats[key] = tmp + } + fd.Close() + + if err := s.Err(); err != nil { + return err + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/pids.go b/vendor/github.com/opencontainers/cgroups/fs2/pids.go new file mode 100644 index 0000000000..9b82b90115 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/pids.go @@ -0,0 +1,71 @@ +package fs2 + +import ( + "errors" + "math" + "os" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fscommon" +) + +func isPidsSet(r *cgroups.Resources) bool { + return r.PidsLimit != 0 +} + +func setPids(dirPath string, r *cgroups.Resources) error { + if !isPidsSet(r) { + return nil + } + if val := numToStr(r.PidsLimit); val != "" { + if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { + return err + } + } + + return nil +} + +func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error { + // if the controller is not enabled, let's read PIDS from cgroups.procs + // (or threads if cgroup.threads is enabled) + contents, err := cgroups.ReadFile(dirPath, "cgroup.procs") + if errors.Is(err, unix.ENOTSUP) { + contents, err = cgroups.ReadFile(dirPath, "cgroup.threads") + } + if err != nil { + return err + } + pids := strings.Count(contents, "\n") + stats.PidsStats.Current = uint64(pids) + stats.PidsStats.Limit = 0 + return nil +} + +func statPids(dirPath string, stats *cgroups.Stats) error { + current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current") + if err != nil { + if os.IsNotExist(err) { + return statPidsFromCgroupProcs(dirPath, stats) + } + return err + } + + max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max") + if err != nil { + return err + } + // If no limit is set, read from pids.max returns "max", which is + // converted to MaxUint64 by GetCgroupParamUint. Historically, we + // represent "no limit" for pids as 0, thus this conversion. + if max == math.MaxUint64 { + max = 0 + } + + stats.PidsStats.Current = current + stats.PidsStats.Limit = max + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fs2/psi.go b/vendor/github.com/opencontainers/cgroups/fs2/psi.go new file mode 100644 index 0000000000..010fe0bff2 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fs2/psi.go @@ -0,0 +1,89 @@ +package fs2 + +import ( + "bufio" + "errors" + "fmt" + "os" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" +) + +func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) { + f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + // Kernel < 4.20, or CONFIG_PSI is not set, + // or PSI stats are turned off for the cgroup + // ("echo 0 > cgroup.pressure", kernel >= 6.1). + return nil, nil + } + return nil, err + } + defer f.Close() + + var psistats cgroups.PSIStats + sc := bufio.NewScanner(f) + for sc.Scan() { + parts := strings.Fields(sc.Text()) + var pv *cgroups.PSIData + switch parts[0] { + case "some": + pv = &psistats.Some + case "full": + pv = &psistats.Full + } + if pv != nil { + *pv, err = parsePSIData(parts[1:]) + if err != nil { + return nil, &parseError{Path: dirPath, File: file, Err: err} + } + } + } + if err := sc.Err(); err != nil { + if errors.Is(err, unix.ENOTSUP) { + // Some kernels (e.g. CS9) may return ENOTSUP on read + // if psi=1 kernel cmdline parameter is required. + return nil, nil + } + return nil, &parseError{Path: dirPath, File: file, Err: err} + } + return &psistats, nil +} + +func parsePSIData(psi []string) (cgroups.PSIData, error) { + data := cgroups.PSIData{} + for _, f := range psi { + key, val, ok := strings.Cut(f, "=") + if !ok { + return data, fmt.Errorf("invalid psi data: %q", f) + } + var pv *float64 + switch key { + case "avg10": + pv = &data.Avg10 + case "avg60": + pv = &data.Avg60 + case "avg300": + pv = &data.Avg300 + case "total": + v, err := strconv.ParseUint(val, 10, 64) + if err != nil { + return data, fmt.Errorf("invalid %s PSI value: %w", key, err) + } + data.Total = v + } + if pv != nil { + v, err := strconv.ParseFloat(val, 64) + if err != nil { + return data, fmt.Errorf("invalid %s PSI value: %w", key, err) + } + *pv = v + } + } + return data, nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fscommon/rdma.go b/vendor/github.com/opencontainers/cgroups/fscommon/rdma.go new file mode 100644 index 0000000000..960126ce7c --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fscommon/rdma.go @@ -0,0 +1,120 @@ +package fscommon + +import ( + "bufio" + "errors" + "math" + "os" + "strconv" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" +) + +// parseRdmaKV parses raw string to RdmaEntry. +func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error { + var value uint32 + + k, v, ok := strings.Cut(raw, "=") + + if !ok { + return errors.New("Unable to parse RDMA entry") + } + + if v == "max" { + value = math.MaxUint32 + } else { + val64, err := strconv.ParseUint(v, 10, 32) + if err != nil { + return err + } + value = uint32(val64) + } + switch k { + case "hca_handle": + entry.HcaHandles = value + case "hca_object": + entry.HcaObjects = value + } + + return nil +} + +// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file. +// example entry: mlx4_0 hca_handle=2 hca_object=2000 +func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) { + rdmaEntries := make([]cgroups.RdmaEntry, 0) + fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY) + if err != nil { + return nil, err + } + defer fd.Close() + scanner := bufio.NewScanner(fd) + for scanner.Scan() { + parts := strings.SplitN(scanner.Text(), " ", 4) + if len(parts) == 3 { + entry := new(cgroups.RdmaEntry) + entry.Device = parts[0] + err = parseRdmaKV(parts[1], entry) + if err != nil { + continue + } + err = parseRdmaKV(parts[2], entry) + if err != nil { + continue + } + + rdmaEntries = append(rdmaEntries, *entry) + } + } + return rdmaEntries, scanner.Err() +} + +// RdmaGetStats returns rdma stats such as totalLimit and current entries. +func RdmaGetStats(path string, stats *cgroups.Stats) error { + currentEntries, err := readRdmaEntries(path, "rdma.current") + if err != nil { + if errors.Is(err, os.ErrNotExist) { + err = nil + } + return err + } + maxEntries, err := readRdmaEntries(path, "rdma.max") + if err != nil { + return err + } + // If device got removed between reading two files, ignore returning stats. + if len(currentEntries) != len(maxEntries) { + return nil + } + + stats.RdmaStats = cgroups.RdmaStats{ + RdmaLimit: maxEntries, + RdmaCurrent: currentEntries, + } + + return nil +} + +func createCmdString(device string, limits cgroups.LinuxRdma) string { + cmdString := device + if limits.HcaHandles != nil { + cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10) + } + if limits.HcaObjects != nil { + cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10) + } + return cmdString +} + +// RdmaSet sets RDMA resources. +func RdmaSet(path string, r *cgroups.Resources) error { + for device, limits := range r.Rdma { + if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/cgroups/fscommon/utils.go b/vendor/github.com/opencontainers/cgroups/fscommon/utils.go new file mode 100644 index 0000000000..d8f8dfc023 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/fscommon/utils.go @@ -0,0 +1,144 @@ +package fscommon + +import ( + "errors" + "fmt" + "math" + "path" + "strconv" + "strings" + + "github.com/opencontainers/cgroups" +) + +var ( + // Deprecated: use cgroups.OpenFile instead. + OpenFile = cgroups.OpenFile + // Deprecated: use cgroups.ReadFile instead. + ReadFile = cgroups.ReadFile + // Deprecated: use cgroups.WriteFile instead. + WriteFile = cgroups.WriteFile +) + +// ParseError records a parse error details, including the file path. +type ParseError struct { + Path string + File string + Err error +} + +func (e *ParseError) Error() string { + return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error() +} + +func (e *ParseError) Unwrap() error { return e.Err } + +// ParseUint converts a string to an uint64 integer. +// Negative values are returned at zero as, due to kernel bugs, +// some of the memory cgroup stats can be negative. +func ParseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// ParseKeyValue parses a space-separated "key value" kind of cgroup +// parameter and returns its key as a string, and its value as uint64 +// (using [ParseUint] to convert the value). For example, +// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234. +func ParseKeyValue(t string) (string, uint64, error) { + key, val, ok := strings.Cut(t, " ") + if !ok || key == "" || val == "" { + return "", 0, fmt.Errorf(`line %q is not in "key value" format`, t) + } + + value, err := ParseUint(val, 10, 64) + if err != nil { + return "", 0, err + } + + return key, value, nil +} + +// GetValueByKey reads space-separated "key value" pairs from the specified +// cgroup file, looking for a specified key, and returns its value as uint64, +// using [ParseUint] for conversion. If the value is not found, 0 is returned. +func GetValueByKey(path, file, key string) (uint64, error) { + content, err := cgroups.ReadFile(path, file) + if err != nil { + return 0, err + } + + key += " " + lines := strings.Split(content, "\n") + for _, line := range lines { + v, ok := strings.CutPrefix(line, key) + if ok { + val, err := ParseUint(v, 10, 64) + if err != nil { + err = &ParseError{Path: path, File: file, Err: err} + } + return val, err + } + } + + return 0, nil +} + +// GetCgroupParamUint reads a single uint64 value from the specified cgroup file. +// If the value read is "max", the math.MaxUint64 is returned. +func GetCgroupParamUint(path, file string) (uint64, error) { + contents, err := GetCgroupParamString(path, file) + if err != nil { + return 0, err + } + if contents == "max" { + return math.MaxUint64, nil + } + + res, err := ParseUint(contents, 10, 64) + if err != nil { + return res, &ParseError{Path: path, File: file, Err: err} + } + return res, nil +} + +// GetCgroupParamInt reads a single int64 value from specified cgroup file. +// If the value read is "max", the math.MaxInt64 is returned. +func GetCgroupParamInt(path, file string) (int64, error) { + contents, err := GetCgroupParamString(path, file) + if err != nil { + return 0, err + } + if contents == "max" { + return math.MaxInt64, nil + } + + res, err := strconv.ParseInt(contents, 10, 64) + if err != nil { + return res, &ParseError{Path: path, File: file, Err: err} + } + return res, nil +} + +// GetCgroupParamString reads a string from the specified cgroup file. +func GetCgroupParamString(path, file string) (string, error) { + contents, err := cgroups.ReadFile(path, file) + if err != nil { + return "", err + } + + return strings.TrimSpace(contents), nil +} diff --git a/vendor/github.com/opencontainers/cgroups/getallpids.go b/vendor/github.com/opencontainers/cgroups/getallpids.go new file mode 100644 index 0000000000..1355a51010 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/getallpids.go @@ -0,0 +1,27 @@ +package cgroups + +import ( + "io/fs" + "path/filepath" +) + +// GetAllPids returns all pids from the cgroup identified by path, and all its +// sub-cgroups. +func GetAllPids(path string) ([]int, error) { + var pids []int + err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error { + if iErr != nil { + return iErr + } + if !d.IsDir() { + return nil + } + cPids, err := readProcsFile(p) + if err != nil { + return err + } + pids = append(pids, cPids...) + return nil + }) + return pids, err +} diff --git a/vendor/github.com/opencontainers/cgroups/internal/path/path.go b/vendor/github.com/opencontainers/cgroups/internal/path/path.go new file mode 100644 index 0000000000..a105a7cf48 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/internal/path/path.go @@ -0,0 +1,52 @@ +package path + +import ( + "errors" + "os" + "path/filepath" + + "github.com/opencontainers/cgroups" +) + +// Inner returns a path to cgroup relative to a cgroup mount point, based +// on cgroup configuration, or an error, if cgroup configuration is invalid. +// To be used only by fs cgroup managers (systemd has different path rules). +func Inner(c *cgroups.Cgroup) (string, error) { + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return "", errors.New("cgroup: either Path or Name and Parent should be used") + } + + // XXX: Do not remove cleanPath. Path safety is important! -- cyphar + innerPath := cleanPath(c.Path) + if innerPath == "" { + cgParent := cleanPath(c.Parent) + cgName := cleanPath(c.Name) + innerPath = filepath.Join(cgParent, cgName) + } + + return innerPath, nil +} + +// cleanPath is a copy of github.com/opencontainers/runc/libcontainer/utils.CleanPath. +func cleanPath(path string) string { + // Deal with empty strings nicely. + if path == "" { + return "" + } + + // Ensure that all paths are cleaned (especially problematic ones like + // "/../../../../../" which can cause lots of issues). + + if filepath.IsAbs(path) { + return filepath.Clean(path) + } + + // If the path isn't absolute, we need to do more processing to fix paths + // such as "../../../..//some/path". We also shouldn't convert absolute + // paths to relative ones. + path = filepath.Clean(string(os.PathSeparator) + path) + // This can't fail, as (by definition) all paths are relative to root. + path, _ = filepath.Rel(string(os.PathSeparator), path) + + return path +} diff --git a/vendor/github.com/opencontainers/cgroups/manager/new.go b/vendor/github.com/opencontainers/cgroups/manager/new.go new file mode 100644 index 0000000000..2df39e587e --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/manager/new.go @@ -0,0 +1,77 @@ +package manager + +import ( + "errors" + "fmt" + "path/filepath" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fs" + "github.com/opencontainers/cgroups/fs2" + "github.com/opencontainers/cgroups/systemd" +) + +// New returns the instance of a cgroup manager, which is chosen +// based on the local environment (whether cgroup v1 or v2 is used) +// and the config (whether config.Systemd is set or not). +func New(config *cgroups.Cgroup) (cgroups.Manager, error) { + return NewWithPaths(config, nil) +} + +// NewWithPaths is similar to New, and can be used in case cgroup paths +// are already well known, which can save some resources. +// +// For cgroup v1, the keys are controller/subsystem name, and the values +// are absolute filesystem paths to the appropriate cgroups. +// +// For cgroup v2, the only key allowed is "" (empty string), and the value +// is the unified cgroup path. +func NewWithPaths(config *cgroups.Cgroup, paths map[string]string) (cgroups.Manager, error) { + if config == nil { + return nil, errors.New("cgroups/manager.New: config must not be nil") + } + if config.Systemd && !systemd.IsRunningSystemd() { + return nil, errors.New("systemd not running on this host, cannot use systemd cgroups manager") + } + + // Cgroup v2 aka unified hierarchy. + if cgroups.IsCgroup2UnifiedMode() { + path, err := getUnifiedPath(paths) + if err != nil { + return nil, fmt.Errorf("manager.NewWithPaths: inconsistent paths: %w", err) + } + if config.Systemd { + return systemd.NewUnifiedManager(config, path) + } + return fs2.NewManager(config, path) + } + + // Cgroup v1. + if config.Systemd { + return systemd.NewLegacyManager(config, paths) + } + + return fs.NewManager(config, paths) +} + +// getUnifiedPath is an implementation detail of libcontainer. +// Historically, libcontainer.Create saves cgroup paths as per-subsystem path +// map (as returned by cm.GetPaths(""), but with v2 we only have one single +// unified path (with "" as a key). +// +// This function converts from that map to string (using "" as a key), +// and also checks that the map itself is sane. +func getUnifiedPath(paths map[string]string) (string, error) { + if len(paths) > 1 { + return "", fmt.Errorf("expected a single path, got %+v", paths) + } + path := paths[""] + // can be empty + if path != "" { + if filepath.Clean(path) != path || !filepath.IsAbs(path) { + return "", fmt.Errorf("invalid path: %q", path) + } + } + + return path, nil +} diff --git a/vendor/github.com/opencontainers/cgroups/stats.go b/vendor/github.com/opencontainers/cgroups/stats.go new file mode 100644 index 0000000000..6cd6253ee0 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/stats.go @@ -0,0 +1,209 @@ +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +type BurstData struct { + // Number of periods bandwidth burst occurs + BurstsPeriods uint64 `json:"bursts_periods,omitempty"` + // Cumulative wall-time that any cpus has used above quota in respective periods + // Units: nanoseconds. + BurstTime uint64 `json:"burst_time,omitempty"` +} + +// CpuUsage denotes the usage of a CPU. +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // CPU time consumed per core in kernel mode + // Units: nanoseconds. + PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` + // CPU time consumed per core in user mode + // Units: nanoseconds. + PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type PSIData struct { + Avg10 float64 `json:"avg10"` + Avg60 float64 `json:"avg60"` + Avg300 float64 `json:"avg300"` + Total uint64 `json:"total"` +} + +type PSIStats struct { + Some PSIData `json:"some,omitempty"` + Full PSIData `json:"full,omitempty"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` + BurstData BurstData `json:"burst_data,omitempty"` +} + +type CPUSetStats struct { + // List of the physical numbers of the CPUs on which processes + // in that cpuset are allowed to execute + CPUs []uint16 `json:"cpus,omitempty"` + // cpu_exclusive flag + CPUExclusive uint64 `json:"cpu_exclusive"` + // List of memory nodes on which processes in that cpuset + // are allowed to allocate memory + Mems []uint16 `json:"mems,omitempty"` + // mem_hardwall flag + MemHardwall uint64 `json:"mem_hardwall"` + // mem_exclusive flag + MemExclusive uint64 `json:"mem_exclusive"` + // memory_migrate flag + MemoryMigrate uint64 `json:"memory_migrate"` + // memory_spread page flag + MemorySpreadPage uint64 `json:"memory_spread_page"` + // memory_spread slab flag + MemorySpreadSlab uint64 `json:"memory_spread_slab"` + // memory_pressure + MemoryPressure uint64 `json:"memory_pressure"` + // sched_load balance flag + SchedLoadBalance uint64 `json:"sched_load_balance"` + // sched_relax_domain_level + SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` + Limit uint64 `json:"limit"` +} + +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of swap only + SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"` + // usage of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + // usage of kernel TCP memory + KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` + // usage of memory pages by NUMA node + // see chapter 5.6 of memory controller documentation + PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` + // if true, memory usage is accounted for throughout a hierarchy of cgroups. + UseHierarchy bool `json:"use_hierarchy"` + + Stats map[string]uint64 `json:"stats,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` +} + +type PageUsageByNUMA struct { + // Embedding is used as types can't be recursive. + PageUsageByNUMAInner + Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` +} + +type PageUsageByNUMAInner struct { + Total PageStats `json:"total,omitempty"` + File PageStats `json:"file,omitempty"` + Anon PageStats `json:"anon,omitempty"` + Unevictable PageStats `json:"unevictable,omitempty"` +} + +type PageStats struct { + Total uint64 `json:"total,omitempty"` + Nodes map[uint8]uint64 `json:"nodes,omitempty"` +} + +type PidsStats struct { + // number of pids in the cgroup + Current uint64 `json:"current,omitempty"` + // active pids hard limit + Limit uint64 `json:"limit,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes transferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times hugetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type RdmaEntry struct { + Device string `json:"device,omitempty"` + HcaHandles uint32 `json:"hca_handles,omitempty"` + HcaObjects uint32 `json:"hca_objects,omitempty"` +} + +type RdmaStats struct { + RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"` + RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` +} + +type MiscStats struct { + // current resource usage for a key in misc + Usage uint64 `json:"usage,omitempty"` + // number of times the resource usage was about to go over the max boundary + Events uint64 `json:"events,omitempty"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + PidsStats PidsStats `json:"pids_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` + RdmaStats RdmaStats `json:"rdma_stats,omitempty"` + // the map is in the format "misc resource name: stats of the key" + MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + miscStats := make(map[string]MiscStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/common.go b/vendor/github.com/opencontainers/cgroups/systemd/common.go new file mode 100644 index 0000000000..875a589e3d --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/common.go @@ -0,0 +1,366 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "strconv" + "strings" + "sync" + "time" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + dbus "github.com/godbus/dbus/v5" + "github.com/sirupsen/logrus" + + "github.com/opencontainers/cgroups" +) + +const ( + // Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2. + // v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and + // v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html + defCPUQuotaPeriod = uint64(100000) +) + +var ( + versionOnce sync.Once + version int + + isRunningSystemdOnce sync.Once + isRunningSystemd bool + + // GenerateDeviceProps is a function to generate systemd device + // properties, used by Set methods. Unless + // [github.com/opencontainers/cgroups/devices] + // package is imported, it is set to nil, so cgroup managers can't + // configure devices. + GenerateDeviceProps func(r *cgroups.Resources, sdVer int) ([]systemdDbus.Property, error) +) + +// NOTE: This function comes from package github.com/coreos/go-systemd/util +// It was borrowed here to avoid a dependency on cgo. +// +// IsRunningSystemd checks whether the host was booted with systemd as its init +// system. This functions similarly to systemd's `sd_booted(3)`: internally, it +// checks whether /run/systemd/system/ exists and is a directory. +// http://www.freedesktop.org/software/systemd/man/sd_booted.html +func IsRunningSystemd() bool { + isRunningSystemdOnce.Do(func() { + fi, err := os.Lstat("/run/systemd/system") + isRunningSystemd = err == nil && fi.IsDir() + }) + return isRunningSystemd +} + +// systemd represents slice hierarchy using `-`, so we need to follow suit when +// generating the path of slice. Essentially, test-a-b.slice becomes +// /test.slice/test-a.slice/test-a-b.slice. +func ExpandSlice(slice string) (string, error) { + suffix := ".slice" + // Name has to end with ".slice", but can't be just ".slice". + if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Path-separators are not allowed. + if strings.Contains(slice, "/") { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + var path, prefix string + sliceName := strings.TrimSuffix(slice, suffix) + // if input was -.slice, we should just return root now + if sliceName == "-" { + return "/", nil + } + for _, component := range strings.Split(sliceName, "-") { + // test--a.slice isn't permitted, nor is -test.slice. + if component == "" { + return "", fmt.Errorf("invalid slice name: %s", slice) + } + + // Append the component to the path and to the prefix. + path += "/" + prefix + component + suffix + prefix += component + "-" + } + return path, nil +} + +func newProp(name string, units any) systemdDbus.Property { + return systemdDbus.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func getUnitName(c *cgroups.Cgroup) string { + // by default, we create a scope unless the user explicitly asks for a slice. + if !strings.HasSuffix(c.Name, ".slice") { + return c.ScopePrefix + "-" + c.Name + ".scope" + } + return c.Name +} + +// This code should be in sync with getUnitName. +func getUnitType(unitName string) string { + if strings.HasSuffix(unitName, ".slice") { + return "Slice" + } + return "Scope" +} + +// isDbusError returns true if the error is a specific dbus error. +func isDbusError(err error, name string) bool { + if err != nil { + var derr dbus.Error + if errors.As(err, &derr) { + return strings.Contains(derr.Name, name) + } + } + return false +} + +// isUnitExists returns true if the error is that a systemd unit already exists. +func isUnitExists(err error) bool { + return isDbusError(err, "org.freedesktop.systemd1.UnitExists") +} + +func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error { + statusChan := make(chan string, 1) + retry := true + +retry: + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + _, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan) + return err + }) + if err != nil { + if !isUnitExists(err) { + return err + } + if ignoreExist { + // TODO: remove this hack. + // This is kubelet making sure a slice exists (see + // https://github.com/opencontainers/runc/pull/1124). + return nil + } + if retry { + // In case a unit with the same name exists, this may + // be a leftover failed unit. Reset it, so systemd can + // remove it, and retry once. + err = resetFailedUnit(cm, unitName) + if err != nil { + logrus.Warnf("unable to reset failed unit: %v", err) + } + retry = false + goto retry + } + return err + } + + timeout := time.NewTimer(30 * time.Second) + defer timeout.Stop() + + select { + case s := <-statusChan: + close(statusChan) + // Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit + if s != "done" { + _ = resetFailedUnit(cm, unitName) + return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) + } + case <-timeout.C: + _ = resetFailedUnit(cm, unitName) + return errors.New("Timeout waiting for systemd to create " + unitName) + } + + return nil +} + +func stopUnit(cm *dbusConnManager, unitName string) error { + statusChan := make(chan string, 1) + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + _, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan) + return err + }) + if err == nil { + timeout := time.NewTimer(30 * time.Second) + defer timeout.Stop() + + select { + case s := <-statusChan: + close(statusChan) + // Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit + if s != "done" { + logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s) + } + case <-timeout.C: + return errors.New("Timed out while waiting for systemd to remove " + unitName) + } + } + + // In case of a failed unit, let systemd remove it. + _ = resetFailedUnit(cm, unitName) + + return nil +} + +func resetFailedUnit(cm *dbusConnManager, name string) error { + return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + return c.ResetFailedUnitContext(context.TODO(), name) + }) +} + +func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) { + var prop *systemdDbus.Property + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { + prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName) + return Err + }) + return prop, err +} + +func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { + return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...) + }) +} + +func getManagerProperty(cm *dbusConnManager, name string) (string, error) { + str := "" + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { + var err error + str, err = c.GetManagerProperty(name) + return err + }) + if err != nil { + return "", err + } + return strconv.Unquote(str) +} + +func systemdVersion(cm *dbusConnManager) int { + versionOnce.Do(func() { + version = -1 + verStr, err := getManagerProperty(cm, "Version") + if err == nil { + version, err = systemdVersionAtoi(verStr) + } + + if err != nil { + logrus.WithError(err).Error("unable to get systemd version") + } + }) + + return version +} + +// systemdVersionAtoi extracts a numeric systemd version from the argument. +// The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32", +// "245-1.fc32" (with or without quotes). The result for all of the above +// should be 245. +func systemdVersionAtoi(str string) (int, error) { + // Unconditionally remove the leading prefix ("v). + str = strings.TrimLeft(str, `"v`) + // Match on the first integer we can grab. + for i := range len(str) { + if str[i] < '0' || str[i] > '9' { + // First non-digit: cut the tail. + str = str[:i] + break + } + } + ver, err := strconv.Atoi(str) + if err != nil { + return -1, fmt.Errorf("can't parse version: %w", err) + } + return ver, nil +} + +// addCPUQuota adds CPUQuotaPeriodUSec and CPUQuotaPerSecUSec to the properties. The passed quota may be modified +// along with round-up during calculation in order to write the same value to cgroupfs later. +func addCPUQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota *int64, period uint64) { + if period != 0 { + // systemd only supports CPUQuotaPeriodUSec since v242 + sdVer := systemdVersion(cm) + if sdVer >= 242 { + *properties = append(*properties, + newProp("CPUQuotaPeriodUSec", period)) + } else { + logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+ + " (setting will still be applied to cgroupfs)", sdVer) + } + } + if *quota != 0 || period != 0 { + // corresponds to USEC_INFINITY in systemd + cpuQuotaPerSecUSec := uint64(math.MaxUint64) + if *quota > 0 { + if period == 0 { + // assume the default + period = defCPUQuotaPeriod + } + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + cpuQuotaPerSecUSec = uint64(*quota*1000000) / period + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + // Update the requested quota along with the round-up in order to write the same value to cgroupfs. + *quota = int64(cpuQuotaPerSecUSec) * int64(period) / 1000000 + } + } + *properties = append(*properties, + newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) + } +} + +func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error { + if cpus == "" && mems == "" { + return nil + } + + // systemd only supports AllowedCPUs/AllowedMemoryNodes since v244 + sdVer := systemdVersion(cm) + if sdVer < 244 { + logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+ + " (settings will still be applied to cgroupfs)", sdVer) + return nil + } + + if cpus != "" { + bits, err := RangeToBits(cpus) + if err != nil { + return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w", + cpus, err) + } + *props = append(*props, + newProp("AllowedCPUs", bits)) + } + if mems != "" { + bits, err := RangeToBits(mems) + if err != nil { + return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w", + mems, err) + } + *props = append(*props, + newProp("AllowedMemoryNodes", bits)) + } + return nil +} + +// generateDeviceProperties takes the configured device rules and generates a +// corresponding set of systemd properties to configure the devices correctly. +func generateDeviceProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { + if GenerateDeviceProps == nil { + if len(r.Devices) > 0 { + return nil, cgroups.ErrDevicesUnsupported + } + return nil, nil + } + + return GenerateDeviceProps(r, systemdVersion(cm)) +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go b/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go new file mode 100644 index 0000000000..c6f5642dcd --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/cpuset.go @@ -0,0 +1,60 @@ +package systemd + +import ( + "errors" + "math/big" + "strconv" + "strings" +) + +// RangeToBits converts a text representation of a CPU mask (as written to +// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes +// with the corresponding bits set (as consumed by systemd over dbus as +// AllowedCPUs/AllowedMemoryNodes unit property value). +func RangeToBits(str string) ([]byte, error) { + bits := new(big.Int) + + for _, r := range strings.Split(str, ",") { + // allow extra spaces around + r = strings.TrimSpace(r) + // allow empty elements (extra commas) + if r == "" { + continue + } + startr, endr, ok := strings.Cut(r, "-") + if ok { + start, err := strconv.ParseUint(startr, 10, 32) + if err != nil { + return nil, err + } + end, err := strconv.ParseUint(endr, 10, 32) + if err != nil { + return nil, err + } + if start > end { + return nil, errors.New("invalid range: " + r) + } + for i := start; i <= end; i++ { + bits.SetBit(bits, int(i), 1) + } + } else { + val, err := strconv.ParseUint(startr, 10, 32) + if err != nil { + return nil, err + } + bits.SetBit(bits, int(val), 1) + } + } + + ret := bits.Bytes() + if len(ret) == 0 { + // do not allow empty values + return nil, errors.New("empty value") + } + + // fit cpuset parsing order in systemd + for l, r := 0, len(ret)-1; l < r; l, r = l+1, r-1 { + ret[l], ret[r] = ret[r], ret[l] + } + return ret, nil +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/dbus.go b/vendor/github.com/opencontainers/cgroups/systemd/dbus.go new file mode 100644 index 0000000000..bb87ae83ae --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/dbus.go @@ -0,0 +1,102 @@ +package systemd + +import ( + "context" + "errors" + "fmt" + "sync" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + dbus "github.com/godbus/dbus/v5" +) + +var ( + dbusC *systemdDbus.Conn + dbusMu sync.RWMutex + dbusInited bool + dbusRootless bool +) + +type dbusConnManager struct{} + +// newDbusConnManager initializes systemd dbus connection manager. +func newDbusConnManager(rootless bool) *dbusConnManager { + dbusMu.Lock() + defer dbusMu.Unlock() + if dbusInited && rootless != dbusRootless { + panic("can't have both root and rootless dbus") + } + dbusInited = true + dbusRootless = rootless + return &dbusConnManager{} +} + +// getConnection lazily initializes and returns systemd dbus connection. +func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) { + // In the case where dbusC != nil + // Use the read lock the first time to ensure + // that Conn can be acquired at the same time. + dbusMu.RLock() + if conn := dbusC; conn != nil { + dbusMu.RUnlock() + return conn, nil + } + dbusMu.RUnlock() + + // In the case where dbusC == nil + // Use write lock to ensure that only one + // will be created + dbusMu.Lock() + defer dbusMu.Unlock() + if conn := dbusC; conn != nil { + return conn, nil + } + + conn, err := d.newConnection() + if err != nil { + // When dbus-user-session is not installed, we can't detect whether we should try to connect to user dbus or system dbus, so d.dbusRootless is set to false. + // This may fail with a cryptic error "read unix @->/run/systemd/private: read: connection reset by peer: unknown." + // https://github.com/moby/moby/issues/42793 + return nil, fmt.Errorf("failed to connect to dbus (hint: for rootless containers, maybe you need to install dbus-user-session package, see https://github.com/opencontainers/runc/blob/master/docs/cgroup-v2.md): %w", err) + } + dbusC = conn + return conn, nil +} + +func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) { + if dbusRootless { + return newUserSystemdDbus() + } + return systemdDbus.NewWithContext(context.TODO()) +} + +// resetConnection resets the connection to its initial state +// (so it can be reconnected if necessary). +func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) { + dbusMu.Lock() + defer dbusMu.Unlock() + if dbusC != nil && dbusC == conn { + dbusC.Close() + dbusC = nil + } +} + +// retryOnDisconnect calls op, and if the error it returns is about closed dbus +// connection, the connection is re-established and the op is retried. This helps +// with the situation when dbus is restarted and we have a stale connection. +func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error { + for { + conn, err := d.getConnection() + if err != nil { + return err + } + err = op(conn) + if err == nil { + return nil + } + if !errors.Is(err, dbus.ErrClosed) { + return err + } + d.resetConnection(conn) + } +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/devices.go b/vendor/github.com/opencontainers/cgroups/systemd/devices.go new file mode 100644 index 0000000000..51ca7fa11c --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/devices.go @@ -0,0 +1,74 @@ +package systemd + +import ( + "reflect" + + dbus "github.com/godbus/dbus/v5" + + "github.com/opencontainers/cgroups" +) + +// freezeBeforeSet answers whether there is a need to freeze the cgroup before +// applying its systemd unit properties, and thaw after, while avoiding +// unnecessary freezer state changes. +// +// The reason why we have to freeze is that systemd's application of device +// rules is done disruptively, resulting in spurious errors to common devices +// (unlike our fs driver, they will happily write deny-all rules to running +// containers). So we have to freeze the container to avoid the container get +// an occasional "permission denied" error. +func (m *LegacyManager) freezeBeforeSet(unitName string, r *cgroups.Resources) (needsFreeze, needsThaw bool, err error) { + // Special case for SkipDevices, as used by Kubernetes to create pod + // cgroups with allow-all device policy). + if r.SkipDevices { + if r.SkipFreezeOnSet { + // Both needsFreeze and needsThaw are false. + return + } + + // No need to freeze if SkipDevices is set, and either + // (1) systemd unit does not (yet) exist, or + // (2) it has DevicePolicy=auto and empty DeviceAllow list. + // + // Interestingly, (1) and (2) are the same here because + // a non-existent unit returns default properties, + // and settings in (2) are the defaults. + // + // Do not return errors from getUnitTypeProperty, as they alone + // should not prevent Set from working. + + unitType := getUnitType(unitName) + + devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy") + if e == nil && devPolicy.Value == dbus.MakeVariant("auto") { + devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow") + if e == nil { + if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 { + needsFreeze = false + needsThaw = false + return + } + } + } + } + + needsFreeze = true + needsThaw = true + + // Check the current freezer state. + freezerState, err := m.GetFreezerState() + if err != nil { + return + } + if freezerState == cgroups.Frozen { + // Already frozen, and should stay frozen. + needsFreeze = false + needsThaw = false + } + + if r.Freezer == cgroups.Frozen { + // Will be frozen anyway -- no need to thaw. + needsThaw = false + } + return +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/user.go b/vendor/github.com/opencontainers/cgroups/systemd/user.go new file mode 100644 index 0000000000..4a4348e707 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/user.go @@ -0,0 +1,92 @@ +package systemd + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + dbus "github.com/godbus/dbus/v5" + "github.com/moby/sys/userns" +) + +// newUserSystemdDbus creates a connection for systemd user-instance. +func newUserSystemdDbus() (*systemdDbus.Conn, error) { + addr, err := DetectUserDbusSessionBusAddress() + if err != nil { + return nil, err + } + uid, err := DetectUID() + if err != nil { + return nil, err + } + + return systemdDbus.NewConnection(func() (*dbus.Conn, error) { + conn, err := dbus.Dial(addr) + if err != nil { + return nil, fmt.Errorf("error while dialing %q: %w", addr, err) + } + methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))} + err = conn.Auth(methods) + if err != nil { + conn.Close() + return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err) + } + if err = conn.Hello(); err != nil { + conn.Close() + return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err) + } + return conn, nil + }) +} + +// DetectUID detects UID from the OwnerUID field of `busctl --user status` +// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) . +// +// Otherwise returns os.Getuid() . +func DetectUID() (int, error) { + if !userns.RunningInUserNS() { + return os.Getuid(), nil + } + b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput() + if err != nil { + return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err) + } + scanner := bufio.NewScanner(bytes.NewReader(b)) + for scanner.Scan() { + s := strings.TrimSpace(scanner.Text()) + if uidStr, ok := strings.CutPrefix(s, "OwnerUID="); ok { + i, err := strconv.Atoi(uidStr) + if err != nil { + return -1, fmt.Errorf("could not detect the OwnerUID: %w", err) + } + return i, nil + } + } + if err := scanner.Err(); err != nil { + return -1, err + } + return -1, errors.New("could not detect the OwnerUID") +} + +// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS, if set. +// Otherwise it returns "unix:path=$XDG_RUNTIME_DIR/bus", if $XDG_RUNTIME_DIR/bus exists. +func DetectUserDbusSessionBusAddress() (string, error) { + if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" { + return env, nil + } + if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" { + busPath := filepath.Join(xdr, "bus") + if _, err := os.Stat(busPath); err == nil { + busAddress := "unix:path=" + dbus.EscapeBusAddressValue(busPath) + return busAddress, nil + } + } + return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from the environment; make sure you have installed the dbus-user-session or dbus-daemon package; note you may need to re-login") +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v1.go b/vendor/github.com/opencontainers/cgroups/systemd/v1.go new file mode 100644 index 0000000000..b8959adbfa --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/v1.go @@ -0,0 +1,415 @@ +package systemd + +import ( + "errors" + "os" + "path/filepath" + "strings" + "sync" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/sirupsen/logrus" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fs" +) + +type LegacyManager struct { + mu sync.Mutex + cgroups *cgroups.Cgroup + paths map[string]string + dbus *dbusConnManager +} + +func NewLegacyManager(cg *cgroups.Cgroup, paths map[string]string) (*LegacyManager, error) { + if cg.Rootless { + return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1") + } + if cg.Resources != nil && cg.Resources.Unified != nil { + return nil, cgroups.ErrV1NoUnified + } + if paths == nil { + var err error + paths, err = initPaths(cg) + if err != nil { + return nil, err + } + } + return &LegacyManager{ + cgroups: cg, + paths: paths, + dbus: newDbusConnManager(false), + }, nil +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + // GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Set sets cgroup resource limits. + Set(path string, r *cgroups.Resources) error +} + +var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") + +var legacySubsystems = []subsystem{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NetPrioGroup{}, + &fs.NetClsGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, + &fs.RdmaGroup{}, + &fs.NameGroup{GroupName: "misc"}, +} + +func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { + var properties []systemdDbus.Property + + deviceProperties, err := generateDeviceProperties(r, cm) + if err != nil { + return nil, err + } + properties = append(properties, deviceProperties...) + + if r.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(r.Memory))) + } + + if r.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", r.CpuShares)) + } + + addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) + + if r.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(r.BlkioWeight))) + } + + if r.PidsLimit > 0 || r.PidsLimit == -1 { + properties = append(properties, + newProp("TasksMax", uint64(r.PidsLimit))) + } + + err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) + if err != nil { + return nil, err + } + + return properties, nil +} + +// initPaths figures out and returns paths to cgroups. +func initPaths(c *cgroups.Cgroup) (map[string]string, error) { + slice := "system.slice" + if c.Parent != "" { + var err error + slice, err = ExpandSlice(c.Parent) + if err != nil { + return nil, err + } + } + + unit := getUnitName(c) + + paths := make(map[string]string) + for _, s := range legacySubsystems { + subsystemPath, err := getSubsystemPath(slice, unit, s.Name()) + if err != nil { + // Even if it's `not found` error, we'll return err + // because devices cgroup is hard requirement for + // container security. + if s.Name() == "devices" { + return nil, err + } + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return nil, err + } + paths[s.Name()] = subsystemPath + } + + // If systemd is using cgroups-hybrid mode then add the slice path of + // this container to the paths so the following process executed with + // "runc exec" joins that cgroup as well. + if cgroups.IsCgroup2HybridMode() { + // "" means cgroup-hybrid path + cgroupsHybridPath, err := getSubsystemPath(slice, unit, "") + if err != nil && cgroups.IsNotFound(err) { + return nil, err + } + paths[""] = cgroupsHybridPath + } + + return paths, nil +} + +func (m *LegacyManager) Apply(pid int) error { + var ( + c = m.cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemdDbus.Property + ) + + m.mu.Lock() + defer m.mu.Unlock() + + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + if strings.HasSuffix(unitName, ".slice") { + // If we create a slice, the parent is defined via a Wants=. + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // Otherwise it's a scope, which we put into a Slice=. + properties = append(properties, systemdDbus.PropSlice(slice)) + // Assume scopes always support delegation (supported since systemd v218). + properties = append(properties, newProp("Delegate", true)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true), + newProp("TasksAccounting", true), + ) + + // Assume DefaultDependencies= will always work (the check for it was previously broken.) + properties = append(properties, + newProp("DefaultDependencies", false)) + + properties = append(properties, c.SystemdProps...) + + if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil { + return err + } + + if err := m.joinCgroups(pid); err != nil { + return err + } + + return nil +} + +func (m *LegacyManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + + stopErr := stopUnit(m.dbus, getUnitName(m.cgroups)) + + // Both on success and on error, cleanup all the cgroups + // we are aware of, as some of them were created directly + // by Apply() and are not managed by systemd. + if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil { + return err + } + + return stopErr +} + +func (m *LegacyManager) Path(subsys string) string { + m.mu.Lock() + defer m.mu.Unlock() + return m.paths[subsys] +} + +func (m *LegacyManager) joinCgroups(pid int) error { + for _, sys := range legacySubsystems { + name := sys.Name() + switch name { + case "name=systemd": + // let systemd handle this + case "cpuset": + if path, ok := m.paths[name]; ok { + s := &fs.CpusetGroup{} + if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil { + return err + } + } + default: + if path, ok := m.paths[name]; ok { + if err := os.MkdirAll(path, 0o755); err != nil { + return err + } + if err := cgroups.WriteCgroupProc(path, pid); err != nil { + return err + } + } + } + } + + return nil +} + +func getSubsystemPath(slice, unit, subsystem string) (string, error) { + mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem) + if err != nil { + return "", err + } + + return filepath.Join(mountpoint, slice, unit), nil +} + +func (m *LegacyManager) Freeze(state cgroups.FreezerState) error { + err := m.doFreeze(state) + if err == nil { + m.cgroups.Resources.Freezer = state + } + return err +} + +// doFreeze is the same as Freeze but without +// changing the m.cgroups.Resources.Frozen field. +func (m *LegacyManager) doFreeze(state cgroups.FreezerState) error { + path, ok := m.paths["freezer"] + if !ok { + return errSubsystemDoesNotExist + } + freezer := &fs.FreezerGroup{} + resources := &cgroups.Resources{Freezer: state} + return freezer.Set(path, resources) +} + +func (m *LegacyManager) GetPids() ([]int, error) { + path, ok := m.paths["devices"] + if !ok { + return nil, errSubsystemDoesNotExist + } + return cgroups.GetPids(path) +} + +func (m *LegacyManager) GetAllPids() ([]int, error) { + path, ok := m.paths["devices"] + if !ok { + return nil, errSubsystemDoesNotExist + } + return cgroups.GetAllPids(path) +} + +func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for _, sys := range legacySubsystems { + path := m.paths[sys.Name()] + if path == "" { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *LegacyManager) Set(r *cgroups.Resources) error { + if r == nil { + return nil + } + if r.Unified != nil { + return cgroups.ErrV1NoUnified + } + // Use a copy since CpuQuota in r may be modified. + rCopy := *r + r = &rCopy + properties, err := genV1ResourcesProperties(r, m.dbus) + if err != nil { + return err + } + + unitName := getUnitName(m.cgroups) + needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r) + if err != nil { + return err + } + + if needsFreeze { + if err := m.doFreeze(cgroups.Frozen); err != nil { + // If freezer cgroup isn't supported, we just warn about it. + logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + // skip update the cgroup while frozen failed. #3803 + if !errors.Is(err, errSubsystemDoesNotExist) { + if needsThaw { + if thawErr := m.doFreeze(cgroups.Thawed); thawErr != nil { + logrus.Infof("thaw container after doFreeze failed: %v", thawErr) + } + } + return err + } + } + } + setErr := setUnitProperties(m.dbus, unitName, properties...) + if needsThaw { + if err := m.doFreeze(cgroups.Thawed); err != nil { + logrus.Infof("thaw container after SetUnitProperties failed: %v", err) + } + } + if setErr != nil { + return setErr + } + + for _, sys := range legacySubsystems { + // Get the subsystem path, but don't error out for not found cgroups. + path, ok := m.paths[sys.Name()] + if !ok { + continue + } + if err := sys.Set(path, r); err != nil { + return err + } + } + + return nil +} + +func (m *LegacyManager) GetPaths() map[string]string { + m.mu.Lock() + defer m.mu.Unlock() + return m.paths +} + +func (m *LegacyManager) GetCgroups() (*cgroups.Cgroup, error) { + return m.cgroups, nil +} + +func (m *LegacyManager) GetFreezerState() (cgroups.FreezerState, error) { + path, ok := m.paths["freezer"] + if !ok { + return cgroups.Undefined, nil + } + freezer := &fs.FreezerGroup{} + return freezer.GetState(path) +} + +func (m *LegacyManager) Exists() bool { + return cgroups.PathExists(m.Path("devices")) +} + +func (m *LegacyManager) OOMKillCount() (uint64, error) { + return fs.OOMKillCount(m.Path("memory")) +} diff --git a/vendor/github.com/opencontainers/cgroups/systemd/v2.go b/vendor/github.com/opencontainers/cgroups/systemd/v2.go new file mode 100644 index 0000000000..636b9cb01b --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/systemd/v2.go @@ -0,0 +1,518 @@ +package systemd + +import ( + "bufio" + "errors" + "fmt" + "math" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + systemdDbus "github.com/coreos/go-systemd/v22/dbus" + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/sirupsen/logrus" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fs2" +) + +const ( + cpuIdleSupportedVersion = 252 +) + +type UnifiedManager struct { + mu sync.Mutex + cgroups *cgroups.Cgroup + // path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" + path string + dbus *dbusConnManager + fsMgr cgroups.Manager +} + +func NewUnifiedManager(config *cgroups.Cgroup, path string) (*UnifiedManager, error) { + m := &UnifiedManager{ + cgroups: config, + path: path, + dbus: newDbusConnManager(config.Rootless), + } + if err := m.initPath(); err != nil { + return nil, err + } + + fsMgr, err := fs2.NewManager(config, m.path) + if err != nil { + return nil, err + } + m.fsMgr = fsMgr + + return m, nil +} + +func shouldSetCPUIdle(cm *dbusConnManager, v string) bool { + // The only valid values for cpu.idle are 0 and 1. As it is + // not possible to directly set cpu.idle to 0 via systemd, + // ignore 0. Ignore other values as we'll error out later + // in Set() while calling fsMgr.Set(). + return v == "1" && systemdVersion(cm) >= cpuIdleSupportedVersion +} + +// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified +// key/value map (where key is cgroupfs file name) to systemd unit properties. +// This is on a best-effort basis, so the properties that are not known +// (to this function and/or systemd) are ignored (but logged with "debug" +// log level). +// +// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt +// +// For the list of systemd unit properties, see systemd.resource-control(5). +func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) { + var err error + + for k, v := range res { + if strings.Contains(k, "/") { + return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k) + } + if strings.IndexByte(k, '.') <= 0 { + return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k) + } + // Kernel is quite forgiving to extra whitespace + // around the value, and so should we. + v = strings.TrimSpace(v) + // Please keep cases in alphabetical order. + switch k { + case "cpu.idle": + if shouldSetCPUIdle(cm, v) { + // Setting CPUWeight to 0 tells systemd + // to set cpu.idle to 1. + props = append(props, + newProp("CPUWeight", uint64(0))) + } + + case "cpu.max": + // value: quota [period] + quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set + period := defCPUQuotaPeriod + sv := strings.Fields(v) + if len(sv) < 1 || len(sv) > 2 { + return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v) + } + // quota + if sv[0] != "max" { + quota, err = strconv.ParseInt(sv[0], 10, 64) + if err != nil { + return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err) + } + } + // period + if len(sv) == 2 { + period, err = strconv.ParseUint(sv[1], 10, 64) + if err != nil { + return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err) + } + } + addCPUQuota(cm, &props, "a, period) + + case "cpu.weight": + if shouldSetCPUIdle(cm, strings.TrimSpace(res["cpu.idle"])) { + // Do not add duplicate CPUWeight property + // (see case "cpu.idle" above). + logrus.Warn("unable to apply both cpu.weight and cpu.idle to systemd, ignoring cpu.weight") + continue + } + num, err := strconv.ParseUint(v, 10, 64) + if err != nil { + return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) + } + props = append(props, + newProp("CPUWeight", num)) + + case "cpuset.cpus", "cpuset.mems": + bits, err := RangeToBits(v) + if err != nil { + return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err) + } + m := map[string]string{ + "cpuset.cpus": "AllowedCPUs", + "cpuset.mems": "AllowedMemoryNodes", + } + // systemd only supports these properties since v244 + sdVer := systemdVersion(cm) + if sdVer >= 244 { + props = append(props, + newProp(m[k], bits)) + } else { + logrus.Debugf("systemd v%d is too old to support %s"+ + " (setting will still be applied to cgroupfs)", + sdVer, m[k]) + } + + case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max": + num := uint64(math.MaxUint64) + if v != "max" { + num, err = strconv.ParseUint(v, 10, 64) + if err != nil { + return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) + } + } + m := map[string]string{ + "memory.high": "MemoryHigh", + "memory.low": "MemoryLow", + "memory.min": "MemoryMin", + "memory.max": "MemoryMax", + "memory.swap.max": "MemorySwapMax", + } + props = append(props, + newProp(m[k], num)) + + case "pids.max": + num := uint64(math.MaxUint64) + if v != "max" { + var err error + num, err = strconv.ParseUint(v, 10, 64) + if err != nil { + return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) + } + } + props = append(props, + newProp("TasksMax", num)) + + case "memory.oom.group": + // Setting this to 1 is roughly equivalent to OOMPolicy=kill + // (as per systemd.service(5) and + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html), + // but it's not clear what to do if it is unset or set + // to 0 in runc update, as there are two other possible + // values for OOMPolicy (continue/stop). + fallthrough + + default: + // Ignore the unknown resource here -- will still be + // applied in Set which calls fs2.Set. + logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v) + } + } + + return props, nil +} + +func genV2ResourcesProperties(dirPath string, r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { + // We need this check before setting systemd properties, otherwise + // the container is OOM-killed and the systemd unit is removed + // before we get to fsMgr.Set(). + if err := fs2.CheckMemoryUsage(dirPath, r); err != nil { + return nil, err + } + + var properties []systemdDbus.Property + + // NOTE: This is of questionable correctness because we insert our own + // devices eBPF program later. Two programs with identical rules + // aren't the end of the world, but it is a bit concerning. However + // it's unclear if systemd removes all eBPF programs attached when + // doing SetUnitProperties... + deviceProperties, err := generateDeviceProperties(r, cm) + if err != nil { + return nil, err + } + properties = append(properties, deviceProperties...) + + if r.Memory != 0 { + properties = append(properties, + newProp("MemoryMax", uint64(r.Memory))) + } + if r.MemoryReservation != 0 { + properties = append(properties, + newProp("MemoryLow", uint64(r.MemoryReservation))) + } + + swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) + if err != nil { + return nil, err + } + if swap != 0 { + properties = append(properties, + newProp("MemorySwapMax", uint64(swap))) + } + + idleSet := false + // The logic here is the same as in shouldSetCPUIdle. + if r.CPUIdle != nil && *r.CPUIdle == 1 && systemdVersion(cm) >= cpuIdleSupportedVersion { + properties = append(properties, + newProp("CPUWeight", uint64(0))) + idleSet = true + } + if r.CpuWeight != 0 { + if idleSet { + // Ignore CpuWeight if CPUIdle is already set. + logrus.Warn("unable to apply both CPUWeight and CpuIdle to systemd, ignoring CPUWeight") + } else { + properties = append(properties, + newProp("CPUWeight", r.CpuWeight)) + } + } + + addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) + + if r.PidsLimit > 0 || r.PidsLimit == -1 { + properties = append(properties, + newProp("TasksMax", uint64(r.PidsLimit))) + } + + err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) + if err != nil { + return nil, err + } + + // ignore r.KernelMemory + + // convert Resources.Unified map to systemd properties + if r.Unified != nil { + unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified) + if err != nil { + return nil, err + } + properties = append(properties, unifiedProps...) + } + + return properties, nil +} + +func (m *UnifiedManager) Apply(pid int) error { + var ( + c = m.cgroups + unitName = getUnitName(c) + properties []systemdDbus.Property + ) + + slice := "system.slice" + if m.cgroups.Rootless { + slice = "user.slice" + } + if c.Parent != "" { + slice = c.Parent + } + + properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) + + if strings.HasSuffix(unitName, ".slice") { + // If we create a slice, the parent is defined via a Wants=. + properties = append(properties, systemdDbus.PropWants(slice)) + } else { + // Otherwise it's a scope, which we put into a Slice=. + properties = append(properties, systemdDbus.PropSlice(slice)) + // Assume scopes always support delegation (supported since systemd v218). + properties = append(properties, newProp("Delegate", true)) + } + + // only add pid if its valid, -1 is used w/ general slice creation. + if pid != -1 { + properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) + } + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("IOAccounting", true), + newProp("TasksAccounting", true), + ) + + // Assume DefaultDependencies= will always work (the check for it was previously broken.) + properties = append(properties, + newProp("DefaultDependencies", false)) + + properties = append(properties, c.SystemdProps...) + + if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil { + return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err) + } + + if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil { + return err + } + + if c.OwnerUID != nil { + // The directory itself must be chowned. + err := os.Chown(m.path, *c.OwnerUID, -1) + if err != nil { + return err + } + + filesToChown, err := cgroupFilesToChown() + if err != nil { + return err + } + + for _, v := range filesToChown { + err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1) + // Some files might not be present. + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + } + } + + return nil +} + +// The kernel exposes a list of files that should be chowned to the delegate +// uid in /sys/kernel/cgroup/delegate. If the file is not present +// (Linux < 4.15), use the initial values mentioned in cgroups(7). +func cgroupFilesToChown() ([]string, error) { + const cgroupDelegateFile = "/sys/kernel/cgroup/delegate" + + f, err := os.Open(cgroupDelegateFile) + if err != nil { + return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil + } + defer f.Close() + + filesToChown := []string{} + scanner := bufio.NewScanner(f) + for scanner.Scan() { + filesToChown = append(filesToChown, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err) + } + + return filesToChown, nil +} + +func (m *UnifiedManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + + unitName := getUnitName(m.cgroups) + if err := stopUnit(m.dbus, unitName); err != nil { + return err + } + + // systemd 239 do not remove sub-cgroups. + err := m.fsMgr.Destroy() + // fsMgr.Destroy has handled ErrNotExist + if err != nil { + return err + } + + return nil +} + +func (m *UnifiedManager) Path(_ string) string { + return m.path +} + +// getSliceFull value is used in initPath. +// The value is incompatible with systemdDbus.PropSlice. +func (m *UnifiedManager) getSliceFull() (string, error) { + c := m.cgroups + slice := "system.slice" + if c.Rootless { + slice = "user.slice" + } + if c.Parent != "" { + var err error + slice, err = ExpandSlice(c.Parent) + if err != nil { + return "", err + } + } + + if c.Rootless { + // managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service". + managerCG, err := getManagerProperty(m.dbus, "ControlGroup") + if err != nil { + return "", err + } + slice = filepath.Join(managerCG, slice) + } + + // an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice" + // NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified. + return slice, nil +} + +func (m *UnifiedManager) initPath() error { + if m.path != "" { + return nil + } + + sliceFull, err := m.getSliceFull() + if err != nil { + return err + } + + c := m.cgroups + path := filepath.Join(sliceFull, getUnitName(c)) + path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path) + if err != nil { + return err + } + + // an example of the final path in rootless: + // "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope" + m.path = path + + return nil +} + +func (m *UnifiedManager) Freeze(state cgroups.FreezerState) error { + return m.fsMgr.Freeze(state) +} + +func (m *UnifiedManager) GetPids() ([]int, error) { + return cgroups.GetPids(m.path) +} + +func (m *UnifiedManager) GetAllPids() ([]int, error) { + return cgroups.GetAllPids(m.path) +} + +func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { + return m.fsMgr.GetStats() +} + +func (m *UnifiedManager) Set(r *cgroups.Resources) error { + if r == nil { + return nil + } + // Use a copy since CpuQuota in r may be modified. + rCopy := *r + r = &rCopy + properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus) + if err != nil { + return err + } + + if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil { + return fmt.Errorf("unable to set unit properties: %w", err) + } + + return m.fsMgr.Set(r) +} + +func (m *UnifiedManager) GetPaths() map[string]string { + paths := make(map[string]string, 1) + paths[""] = m.path + return paths +} + +func (m *UnifiedManager) GetCgroups() (*cgroups.Cgroup, error) { + return m.cgroups, nil +} + +func (m *UnifiedManager) GetFreezerState() (cgroups.FreezerState, error) { + return m.fsMgr.GetFreezerState() +} + +func (m *UnifiedManager) Exists() bool { + return cgroups.PathExists(m.path) +} + +func (m *UnifiedManager) OOMKillCount() (uint64, error) { + return m.fsMgr.OOMKillCount() +} diff --git a/vendor/github.com/opencontainers/cgroups/utils.go b/vendor/github.com/opencontainers/cgroups/utils.go new file mode 100644 index 0000000000..95b3310ab6 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/utils.go @@ -0,0 +1,483 @@ +package cgroups + +import ( + "bufio" + "errors" + "fmt" + "io" + "math" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" + + "github.com/moby/sys/userns" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +const ( + CgroupProcesses = "cgroup.procs" + unifiedMountpoint = "/sys/fs/cgroup" + hybridMountpoint = "/sys/fs/cgroup/unified" +) + +var ( + isUnifiedOnce sync.Once + isUnified bool + isHybridOnce sync.Once + isHybrid bool +) + +// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. +func IsCgroup2UnifiedMode() bool { + isUnifiedOnce.Do(func() { + var st unix.Statfs_t + err := unix.Statfs(unifiedMountpoint, &st) + if err != nil { + level := logrus.WarnLevel + if os.IsNotExist(err) && userns.RunningInUserNS() { + // For rootless containers, sweep it under the rug. + level = logrus.DebugLevel + } + logrus.StandardLogger().Logf(level, + "statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) + } + isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC + }) + return isUnified +} + +// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode. +func IsCgroup2HybridMode() bool { + isHybridOnce.Do(func() { + var st unix.Statfs_t + err := unix.Statfs(hybridMountpoint, &st) + if err != nil { + isHybrid = false + if !os.IsNotExist(err) { + // Report unexpected errors. + logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint) + } + return + } + isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC + }) + return isHybrid +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +// GetCgroupMounts returns the mounts for the cgroup subsystems. +// all indicates whether to return just the first instance or all the mounts. +// This function should not be used from cgroupv2 code, as in this case +// all the controllers are available under the constant unifiedMountpoint. +func GetCgroupMounts(all bool) ([]Mount, error) { + if IsCgroup2UnifiedMode() { + // TODO: remove cgroupv2 case once all external users are converted + availableControllers, err := GetAllSubsystems() + if err != nil { + return nil, err + } + m := Mount{ + Mountpoint: unifiedMountpoint, + Root: unifiedMountpoint, + Subsystems: availableControllers, + } + return []Mount{m}, nil + } + + return getCgroupMountsV1(all) +} + +// GetAllSubsystems returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + // /proc/cgroups is meaningless for v2 + // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features + if IsCgroup2UnifiedMode() { + // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. + // - devices: implemented in kernel 4.15 + // - freezer: implemented in kernel 5.2 + // We assume these are always available, as it is hard to detect availability. + pseudo := []string{"devices", "freezer"} + data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers") + if err != nil { + return nil, err + } + subsystems := append(pseudo, strings.Fields(data)...) + return subsystems, nil + } + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + if err := s.Err(); err != nil { + return nil, err + } + return subsystems, nil +} + +func readProcsFile(dir string) (out []int, _ error) { + file := CgroupProcesses + retry := true + +again: + f, err := OpenFile(dir, file, os.O_RDONLY) + if err != nil { + return nil, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + if errors.Is(s.Err(), unix.ENOTSUP) && retry { + // For a threaded cgroup, read returns ENOTSUP, and we should + // read from cgroup.threads instead. + file = "cgroup.threads" + retry = false + goto again + } + return out, s.Err() +} + +// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup +// or /proc//cgroup, into a map of subsystems to cgroup paths, e.g. +// +// "cpu": "/user.slice/user-1000.slice" +// "pids": "/user.slice/user-1000.slice" +// +// etc. +// +// Note that for cgroup v2 unified hierarchy, there are no per-controller +// cgroup paths, so the resulting map will have a single element where the key +// is empty string ("") and the value is the cgroup path the is in. +func ParseCgroupFile(path string) (map[string]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return parseCgroupFromReader(f) +} + +// helper function for ParseCgroupFile to make testing easier +func parseCgroupFromReader(r io.Reader) (map[string]string, error) { + s := bufio.NewScanner(r) + cgroups := make(map[string]string) + + for s.Scan() { + text := s.Text() + // from cgroups(7): + // /proc/[pid]/cgroup + // ... + // For each cgroup hierarchy ... there is one entry + // containing three colon-separated fields of the form: + // hierarchy-ID:subsystem-list:cgroup-path + parts := strings.SplitN(text, ":", 3) + if len(parts) < 3 { + return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) + } + + for _, subs := range strings.Split(parts[1], ",") { + cgroups[subs] = parts[2] + } + } + if err := s.Err(); err != nil { + return nil, err + } + + return cgroups, nil +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +// rmdir tries to remove a directory, optionally retrying on EBUSY. +func rmdir(path string, retry bool) error { + delay := time.Millisecond + tries := 10 + +again: + err := unix.Rmdir(path) + switch err { + case nil, unix.ENOENT: + return nil + case unix.EINTR: + goto again + case unix.EBUSY: + if retry && tries > 0 { + time.Sleep(delay) + delay *= 2 + tries-- + goto again + + } + } + return &os.PathError{Op: "rmdir", Path: path, Err: err} +} + +// RemovePath aims to remove cgroup path. It does so recursively, +// by removing any subdirectories (sub-cgroups) first. +func RemovePath(path string) error { + // Try the fast path first; don't retry on EBUSY yet. + if err := rmdir(path, false); err == nil { + return nil + } + + // There are many reasons why rmdir can fail, including: + // 1. cgroup have existing sub-cgroups; + // 2. cgroup (still) have some processes (that are about to vanish); + // 3. lack of permission (one example is read-only /sys/fs/cgroup mount, + // in which case rmdir returns EROFS even for for a non-existent path, + // see issue 4518). + // + // Using os.ReadDir here kills two birds with one stone: check if + // the directory exists (handling scenario 3 above), and use + // directory contents to remove sub-cgroups (handling scenario 1). + infos, err := os.ReadDir(path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + // Let's remove sub-cgroups, if any. + for _, info := range infos { + if info.IsDir() { + if err = RemovePath(filepath.Join(path, info.Name())); err != nil { + return err + } + } + } + // Finally, try rmdir again, this time with retries on EBUSY, + // which may help with scenario 2 above. + return rmdir(path, true) +} + +// RemovePaths iterates over the provided paths removing them. +func RemovePaths(paths map[string]string) (err error) { + for s, p := range paths { + if err := RemovePath(p); err == nil { + delete(paths, s) + } + } + if len(paths) == 0 { + clear(paths) + return nil + } + return fmt.Errorf("Failed to remove paths: %v", paths) +} + +var ( + hugePageSizes []string + initHPSOnce sync.Once +) + +func HugePageSizes() []string { + initHPSOnce.Do(func() { + dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return + } + files, err := dir.Readdirnames(0) + dir.Close() + if err != nil { + return + } + + hugePageSizes, err = getHugePageSizeFromFilenames(files) + if err != nil { + logrus.Warn("HugePageSizes: ", err) + } + }) + + return hugePageSizes +} + +func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { + pageSizes := make([]string, 0, len(fileNames)) + var warn error + + for _, file := range fileNames { + // example: hugepages-1048576kB + val, ok := strings.CutPrefix(file, "hugepages-") + if !ok { + // Unexpected file name: no prefix found, ignore it. + continue + } + // The suffix is always "kB" (as of Linux 5.13). If we find + // something else, produce an error but keep going. + eLen := len(val) - 2 + val = strings.TrimSuffix(val, "kB") + if len(val) != eLen { + // Highly unlikely. + if warn == nil { + warn = errors.New(file + `: invalid suffix (expected "kB")`) + } + continue + } + size, err := strconv.Atoi(val) + if err != nil { + // Highly unlikely. + if warn == nil { + warn = fmt.Errorf("%s: %w", file, err) + } + continue + } + // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 + // but in our case the size is in KB already. + if size >= (1 << 20) { + val = strconv.Itoa(size>>20) + "GB" + } else if size >= (1 << 10) { + val = strconv.Itoa(size>>10) + "MB" + } else { + val += "KB" + } + pageSizes = append(pageSizes, val) + } + + return pageSizes, warn +} + +// GetPids returns all pids, that were added to cgroup at path. +func GetPids(dir string) ([]int, error) { + return readProcsFile(dir) +} + +// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file +func WriteCgroupProc(dir string, pid int) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s", CgroupProcesses) + } + + // Dont attach any pid to the cgroup if -1 is specified as a pid + if pid == -1 { + return nil + } + + file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) + if err != nil { + return fmt.Errorf("failed to write %v: %w", pid, err) + } + defer file.Close() + + for range 5 { + _, err = file.WriteString(strconv.Itoa(pid)) + if err == nil { + return nil + } + + // EINVAL might mean that the task being added to cgroup.procs is in state + // TASK_NEW. We should attempt to do so again. + if errors.Is(err, unix.EINVAL) { + time.Sleep(30 * time.Millisecond) + continue + } + + return fmt.Errorf("failed to write %v: %w", pid, err) + } + return err +} + +// ConvertCPUSharesToCgroupV2Value converts CPU shares, used by cgroup v1, +// to CPU weight, used by cgroup v2. +// +// Cgroup v1 CPU shares has a range of [2^1...2^18], i.e. [2...262144], +// and the default value is 1024. +// +// Cgroup v2 CPU weight has a range of [10^0...10^4], i.e. [1...10000], +// and the default value is 100. +func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 { + // The value of 0 means "unset". + if cpuShares == 0 { + return 0 + } + if cpuShares <= 2 { + return 1 + } + if cpuShares >= 262144 { + return 10000 + } + l := math.Log2(float64(cpuShares)) + // Quadratic function which fits min, max, and default. + exponent := (l*l+125*l)/612.0 - 7.0/34.0 + + return uint64(math.Ceil(math.Pow(10, exponent))) +} + +// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec +// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap +// is defined as memory+swap combined, while in cgroup v2 swap is a separate value, +// so we need to subtract memory from it where it makes sense. +func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) { + switch { + case memory == -1 && memorySwap == 0: + // For compatibility with cgroup1 controller, set swap to unlimited in + // case the memory is set to unlimited and the swap is not explicitly set, + // treating the request as "set both memory and swap to unlimited". + return -1, nil + case memorySwap == -1, memorySwap == 0: + // Treat -1 ("max") and 0 ("unset") swap as is. + return memorySwap, nil + case memory == -1: + // Unlimited memory, so treat swap as is. + return memorySwap, nil + case memory == 0: + // Unset or unknown memory, can't calculate swap. + return 0, errors.New("unable to set swap limit without memory limit") + case memory < 0: + // Does not make sense to subtract a negative value. + return 0, fmt.Errorf("invalid memory value: %d", memory) + case memorySwap < memory: + // Sanity check. + return 0, errors.New("memory+swap limit should be >= memory limit") + } + + return memorySwap - memory, nil +} + +// Since the OCI spec is designed for cgroup v1, in some cases +// there is need to convert from the cgroup v1 configuration to cgroup v2 +// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990) +// convert linearly from [10-1000] to [1-10000] +func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 { + if blkIoWeight == 0 { + return 0 + } + return 1 + (uint64(blkIoWeight)-10)*9999/990 +} diff --git a/vendor/github.com/opencontainers/cgroups/v1_utils.go b/vendor/github.com/opencontainers/cgroups/v1_utils.go new file mode 100644 index 0000000000..19b8af1344 --- /dev/null +++ b/vendor/github.com/opencontainers/cgroups/v1_utils.go @@ -0,0 +1,276 @@ +package cgroups + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "slices" + "strings" + "sync" + "syscall" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/moby/sys/mountinfo" + "golang.org/x/sys/unix" +) + +// Code in this source file are specific to cgroup v1, +// and must not be used from any cgroup v2 code. + +const ( + CgroupNamePrefix = "name=" + defaultPrefix = "/sys/fs/cgroup" +) + +var ( + errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") + ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1") + + readMountinfoOnce sync.Once + readMountinfoErr error + cgroupMountinfo []*mountinfo.Info +) + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + var nfErr *NotFoundError + return errors.As(err, &nfErr) +} + +func tryDefaultPath(cgroupPath, subsystem string) string { + if !strings.HasPrefix(defaultPrefix, cgroupPath) { + return "" + } + + // remove possible prefix + subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix) + + // Make sure we're still under defaultPrefix, and resolve + // a possible symlink (like cpu -> cpu,cpuacct). + path, err := securejoin.SecureJoin(defaultPrefix, subsystem) + if err != nil { + return "" + } + + // (1) path should be a directory. + st, err := os.Lstat(path) + if err != nil || !st.IsDir() { + return "" + } + + // (2) path should be a mount point. + pst, err := os.Lstat(filepath.Dir(path)) + if err != nil { + return "" + } + + if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev { + // parent dir has the same dev -- path is not a mount point + return "" + } + + // (3) path should have 'cgroup' fs type. + fst := unix.Statfs_t{} + err = unix.Statfs(path, &fst) + if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { + return "" + } + + return path +} + +// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones +// with fstype of "cgroup") for the current running process. +// +// The results are cached (to avoid re-reading mountinfo which is relatively +// expensive), so it is assumed that cgroup mounts are not being changed. +func readCgroupMountinfo() ([]*mountinfo.Info, error) { + readMountinfoOnce.Do(func() { + // mountinfo.GetMounts uses /proc/thread-self, so we can use it without + // issues. + cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( + mountinfo.FSTypeFilter("cgroup"), + ) + }) + return cgroupMountinfo, readMountinfoErr +} + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt +func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + + // If subsystem is empty, we look for the cgroupv2 hybrid path. + if len(subsystem) == 0 { + return hybridMountpoint, nil + } + + // Avoid parsing mountinfo by trying the default path first, if possible. + if path := tryDefaultPath(cgroupPath, subsystem); path != "" { + return path, nil + } + + mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) + return mnt, err +} + +func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { + if IsCgroup2UnifiedMode() { + return "", "", errUnified + } + + mi, err := readCgroupMountinfo() + if err != nil { + return "", "", err + } + + return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) +} + +func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) { + for _, mi := range mounts { + if strings.HasPrefix(mi.Mountpoint, cgroupPath) { + if slices.Contains(strings.Split(mi.VFSOptions, ","), subsystem) { + return mi.Mountpoint, mi.Root, nil + } + } + } + + return "", "", NewNotFoundError(subsystem) +} + +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { + if len(m.Subsystems) == 0 { + return "", errors.New("no subsystem for mount") + } + + return getControllerPath(m.Subsystems[0], cgroups) +} + +func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) { + res := make([]Mount, 0, len(ss)) + numFound := 0 + for _, mi := range mounts { + m := Mount{ + Mountpoint: mi.Mountpoint, + Root: mi.Root, + } + for _, opt := range strings.Split(mi.VFSOptions, ",") { + seen, known := ss[opt] + if !known || (!all && seen) { + continue + } + ss[opt] = true + opt = strings.TrimPrefix(opt, CgroupNamePrefix) + m.Subsystems = append(m.Subsystems, opt) + numFound++ + } + if len(m.Subsystems) > 0 || all { + res = append(res, m) + } + if !all && numFound >= len(ss) { + break + } + } + return res, nil +} + +func getCgroupMountsV1(all bool) ([]Mount, error) { + mi, err := readCgroupMountinfo() + if err != nil { + return nil, err + } + + // We don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. + allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for s := range allSubsystems { + allMap[s] = false + } + + return getCgroupMountsHelper(allMap, mi, all) +} + +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + + // We don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. + cgroups, err := ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return "", err + } + + return getControllerPath(subsystem, cgroups) +} + +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + + // If subsystem is empty, we look for the cgroupv2 hybrid path. + if len(subsystem) == 0 { + return hybridMountpoint, nil + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see paths from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + +func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { + if IsCgroup2UnifiedMode() { + return "", errUnified + } + + if p, ok := cgroups[subsystem]; ok { + return p, nil + } + + if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { + return p, nil + } + + return "", NewNotFoundError(subsystem) +} diff --git a/vendor/github.com/opencontainers/runc/internal/linux/doc.go b/vendor/github.com/opencontainers/runc/internal/linux/doc.go new file mode 100644 index 0000000000..4d1eb90010 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/linux/doc.go @@ -0,0 +1,3 @@ +// Package linux provides minimal wrappers around Linux system calls, primarily +// to provide support for automatic EINTR-retries. +package linux diff --git a/vendor/github.com/opencontainers/runc/internal/linux/linux.go b/vendor/github.com/opencontainers/runc/internal/linux/linux.go new file mode 100644 index 0000000000..f9e6753427 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/linux/linux.go @@ -0,0 +1,44 @@ +package linux + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// Readlinkat wraps [unix.Readlinkat]. +func Readlinkat(dir *os.File, path string) (string, error) { + size := 4096 + for { + linkBuf := make([]byte, size) + n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf) + if err != nil { + return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err} + } + if n != size { + return string(linkBuf[:n]), nil + } + // Possible truncation, resize the buffer. + size *= 2 + } +} + +// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). +func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { + // Make sure O_NOCTTY is always set -- otherwise runc might accidentally + // gain it as a controlling terminal. O_CLOEXEC also needs to be set to + // make sure we don't leak the handle either. + flags |= unix.O_NOCTTY | unix.O_CLOEXEC + + // There is no nice wrapper for this kind of ioctl in unix. + peerFd, _, errno := unix.Syscall( + unix.SYS_IOCTL, + ptyFd, + uintptr(unix.TIOCGPTPEER), + uintptr(flags), + ) + if errno != 0 { + return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) + } + return os.NewFile(peerFd, unsafePeerPath), nil +} diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/doc.go b/vendor/github.com/opencontainers/runc/internal/pathrs/doc.go new file mode 100644 index 0000000000..496ca59510 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/doc.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package pathrs provides wrappers around filepath-securejoin to add the +// minimum set of features needed from libpathrs that are not provided by +// filepath-securejoin, with the eventual goal being that these can be used to +// ease the transition by converting them stubs when enabling libpathrs builds. +package pathrs diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/mkdirall_pathrslite.go b/vendor/github.com/opencontainers/runc/internal/pathrs/mkdirall_pathrslite.go new file mode 100644 index 0000000000..a9a0157c68 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/mkdirall_pathrslite.go @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// MkdirAllInRootOpen attempts to make +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// os.MkdirAll(path, mode) +// os.Open(path) +// +// safer against attacks where components in the path are changed between +// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +// try to detect any symlink components in the path while we are doing the +// MkdirAll. +// +// NOTE: If unsafePath is a subpath of root, we assume that you have already +// called SecureJoin and so we use the provided path verbatim without resolving +// any symlinks (this is done in a way that avoids symlink-exchange races). +// This means that the path also must not contain ".." elements, otherwise an +// error will occur. +// +// This uses (pathrs-lite).MkdirAllHandle under the hood, but it has special +// handling if unsafePath has already been scoped within the rootfs (this is +// needed for a lot of runc callers and fixing this would require reworking a +// lot of path logic). +func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (*os.File, error) { + // If the path is already "within" the root, get the path relative to the + // root and use that as the unsafe path. This is necessary because a lot of + // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring + // all of them to stop using these SecureJoin'd paths would require a fair + // amount of work. + // TODO(cyphar): Do the refactor to libpathrs once it's ready. + if IsLexicallyInRoot(root, unsafePath) { + subPath, err := filepath.Rel(root, unsafePath) + if err != nil { + return nil, err + } + unsafePath = subPath + } + + // Check for any silly mode bits. + if mode&^0o7777 != 0 { + return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) + } + // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if + // passed. While it would make sense to return an error in that case (since + // the user has asked for a mode that won't be applied), for compatibility + // reasons we have to ignore these bits. + if ignoredBits := mode &^ 0o1777; ignoredBits != 0 { + logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits) + mode &= 0o1777 + } + + rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open root handle: %w", err) + } + defer rootDir.Close() + + return retryEAGAIN(func() (*os.File, error) { + return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) + }) +} + +// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +// returned handle, for callers that don't need to use it. +func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error { + f, err := MkdirAllInRootOpen(root, unsafePath, mode) + if err == nil { + _ = f.Close() + } + return err +} diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/path.go b/vendor/github.com/opencontainers/runc/internal/pathrs/path.go new file mode 100644 index 0000000000..1ee7c795d5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/path.go @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "strings" +) + +// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), +// but properly handling the case where path or root have a "/" suffix. +// +// NOTE: The return value only make sense if the path is already mostly cleaned +// (i.e., doesn't contain "..", ".", nor unneeded "/"s). +func IsLexicallyInRoot(root, path string) bool { + root = strings.TrimRight(root, "/") + path = strings.TrimRight(path, "/") + return strings.HasPrefix(path+"/", root+"/") +} diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/procfs_pathrslite.go b/vendor/github.com/opencontainers/runc/internal/pathrs/procfs_pathrslite.go new file mode 100644 index 0000000000..37450a0eca --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/procfs_pathrslite.go @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2025 Aleksa Sarai + * Copyright (C) 2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath string, flags int) (*os.File, error) { + handle, err := retryEAGAIN(func() (*os.File, error) { + return openFn(subpath) + }) + if err != nil { + return nil, err + } + defer handle.Close() + + f, err := Reopen(handle, flags) + if err != nil { + return nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } + return f, nil +} + +// ProcSelfOpen is a wrapper around [procfs.Handle.OpenSelf] and +// [pathrs.Reopen], to let you one-shot open a procfs file with the given +// flags. +func ProcSelfOpen(subpath string, flags int) (*os.File, error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } + defer proc.Close() + return procOpenReopen(proc.OpenSelf, subpath, flags) +} + +// ProcPidOpen is a wrapper around [procfs.Handle.OpenPid] and [pathrs.Reopen], +// to let you one-shot open a procfs file with the given flags. +func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } + defer proc.Close() + return procOpenReopen(func(subpath string) (*os.File, error) { + return proc.OpenPid(pid, subpath) + }, subpath, flags) +} + +// ProcThreadSelfOpen is a wrapper around [procfs.Handle.OpenThreadSelf] and +// [pathrs.Reopen], to let you one-shot open a procfs file with the given +// flags. The returned [procfs.ProcThreadSelfCloser] needs the same handling as +// when using pathrs-lite. +func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThreadSelfCloser, Err error) { + proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, nil, err + } + defer proc.Close() + + handle, closer, err := retryEAGAIN2(func() (*os.File, procfs.ProcThreadSelfCloser, error) { + return proc.OpenThreadSelf(subpath) + }) + if err != nil { + return nil, nil, err + } + if closer != nil { + defer func() { + if Err != nil { + closer() + } + }() + } + defer handle.Close() + + f, err := Reopen(handle, flags) + if err != nil { + return nil, nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } + return f, closer, nil +} + +// Reopen is a wrapper around pathrs.Reopen. +func Reopen(file *os.File, flags int) (*os.File, error) { + return retryEAGAIN(func() (*os.File, error) { + return pathrs.Reopen(file, flags) + }) +} diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/retry.go b/vendor/github.com/opencontainers/runc/internal/pathrs/retry.go new file mode 100644 index 0000000000..a51d335c0d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/retry.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "errors" + "fmt" + "time" + + "golang.org/x/sys/unix" +) + +// Based on >50k tests running "runc run" on a 16-core system with very heavy +// rename(2) load, the single longest latency caused by -EAGAIN retries was +// ~800us (with the vast majority being closer to 400us). So, a 2ms limit +// should give more than enough headroom for any real system in practice. +const retryDeadline = 2 * time.Millisecond + +// retryEAGAIN is a top-level retry loop for pathrs to try to returning +// spurious errors in most normal user cases when using openat2 (libpathrs +// itself does up to 128 retries already, but this method takes a +// wallclock-deadline approach to simply retry until a timer elapses). +func retryEAGAIN[T any](fn func() (T, error)) (T, error) { + deadline := time.After(retryDeadline) + for { + v, err := fn() + if !errors.Is(err, unix.EAGAIN) { + return v, err + } + select { + case <-deadline: + return *new(T), fmt.Errorf("%v retry deadline exceeded: %w", retryDeadline, err) + default: + // retry + } + } +} + +// retryEAGAIN2 is like retryEAGAIN except it returns two values. +func retryEAGAIN2[T1, T2 any](fn func() (T1, T2, error)) (T1, T2, error) { + type ret struct { + v1 T1 + v2 T2 + } + v, err := retryEAGAIN(func() (ret, error) { + v1, v2, err := fn() + return ret{v1: v1, v2: v2}, err + }) + return v.v1, v.v2, err +} diff --git a/vendor/github.com/opencontainers/runc/internal/pathrs/root_pathrslite.go b/vendor/github.com/opencontainers/runc/internal/pathrs/root_pathrslite.go new file mode 100644 index 0000000000..899af27036 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/pathrs/root_pathrslite.go @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "golang.org/x/sys/unix" +) + +// OpenInRoot opens the given path inside the root with the provided flags. It +// is effectively shorthand for [securejoin.OpenInRoot] followed by +// [securejoin.Reopen]. +func OpenInRoot(root, subpath string, flags int) (*os.File, error) { + handle, err := retryEAGAIN(func() (*os.File, error) { + return pathrs.OpenInRoot(root, subpath) + }) + if err != nil { + return nil, err + } + defer handle.Close() + + return Reopen(handle, flags) +} + +// CreateInRoot creates a new file inside a root (as well as any missing parent +// directories) and returns a handle to said file. This effectively has +// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL, +// include it in the passed flags. The fileMode argument uses unix.* mode bits, +// *not* os.FileMode. +func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) { + dir, filename := filepath.Split(subpath) + if filepath.Join("/", filename) == "/" { + return nil, fmt.Errorf("create in root subpath %q has bad trailing component %q", subpath, filename) + } + + dirFd, err := MkdirAllInRootOpen(root, dir, 0o755) + if err != nil { + return nil, err + } + defer dirFd.Close() + + // We know that the filename does not have any "/" components, and that + // dirFd is inside the root. O_NOFOLLOW will stop us from following + // trailing symlinks, so this is safe to do. libpathrs's Root::create_file + // works the same way. + flags |= unix.O_CREAT | unix.O_NOFOLLOW + fd, err := unix.Openat(int(dirFd.Fd()), filename, flags, fileMode) + if err != nil { + return nil, err + } + return os.NewFile(uintptr(fd), root+"/"+subpath), nil +} diff --git a/vendor/github.com/opencontainers/runc/internal/sys/doc.go b/vendor/github.com/opencontainers/runc/internal/sys/doc.go new file mode 100644 index 0000000000..075387f7a3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/sys/doc.go @@ -0,0 +1,5 @@ +// Package sys is an internal package that contains helper methods for dealing +// with Linux that are more complicated than basic wrappers. Basic wrappers +// usually belong in internal/linux. If you feel something belongs in +// libcontainer/utils or libcontainer/system, it probably belongs here instead. +package sys diff --git a/vendor/github.com/opencontainers/runc/internal/sys/opath_linux.go b/vendor/github.com/opencontainers/runc/internal/sys/opath_linux.go new file mode 100644 index 0000000000..17a216bc50 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/sys/opath_linux.go @@ -0,0 +1,53 @@ +package sys + +import ( + "fmt" + "os" + "runtime" + "strconv" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" +) + +// FchmodFile is a wrapper around fchmodat2(AT_EMPTY_PATH) with fallbacks for +// older kernels. This is distinct from [File.Chmod] and [unix.Fchmod] in that +// it works on O_PATH file descriptors. +func FchmodFile(f *os.File, mode uint32) error { + err := unix.Fchmodat(int(f.Fd()), "", mode, unix.AT_EMPTY_PATH) + // If fchmodat2(2) is not available at all, golang.org/x/unix (probably + // in order to mirror glibc) returns EOPNOTSUPP rather than EINVAL + // (what the kernel actually returns for invalid flags, which is being + // emulated) or ENOSYS (which is what glibc actually sees). + if err != unix.EINVAL && err != unix.EOPNOTSUPP { //nolint:errorlint // unix errors are bare + // err == nil is implicitly handled + return os.NewSyscallError("fchmodat2 AT_EMPTY_PATH", err) + } + + // AT_EMPTY_PATH support was added to fchmodat2 in Linux 6.6 + // (5daeb41a6fc9d0d81cb2291884b7410e062d8fa1). The alternative for + // older kernels is to go through /proc. + fdDir, closer, err2 := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY) + if err2 != nil { + return fmt.Errorf("fchmodat2 AT_EMPTY_PATH fallback: %w", err2) + } + defer closer() + defer fdDir.Close() + + err = unix.Fchmodat(int(fdDir.Fd()), strconv.Itoa(int(f.Fd())), mode, 0) + if err != nil { + err = fmt.Errorf("fchmodat /proc/self/fd/%d: %w", f.Fd(), err) + } + runtime.KeepAlive(f) + return err +} + +// FchownFile is a wrapper around fchownat(AT_EMPTY_PATH). This is distinct +// from [File.Chown] and [unix.Fchown] in that it works on O_PATH file +// descriptors. +func FchownFile(f *os.File, uid, gid int) error { + err := unix.Fchownat(int(f.Fd()), "", uid, gid, unix.AT_EMPTY_PATH) + runtime.KeepAlive(f) + return os.NewSyscallError("fchownat AT_EMPTY_PATH", err) +} diff --git a/vendor/github.com/opencontainers/runc/internal/sys/sysctl_linux.go b/vendor/github.com/opencontainers/runc/internal/sys/sysctl_linux.go new file mode 100644 index 0000000000..96876a55ff --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/sys/sysctl_linux.go @@ -0,0 +1,54 @@ +package sys + +import ( + "fmt" + "io" + "os" + "strings" + + "golang.org/x/sys/unix" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" +) + +func procfsOpenRoot(proc *procfs.Handle, subpath string, flags int) (*os.File, error) { + handle, err := proc.OpenRoot(subpath) + if err != nil { + return nil, err + } + defer handle.Close() + + return pathrs.Reopen(handle, flags) +} + +// WriteSysctls sets the given sysctls to the requested values. +func WriteSysctls(sysctls map[string]string) error { + // We are going to write multiple sysctls, which require writing to an + // unmasked procfs which is not going to be cached. To avoid creating a new + // procfs instance for each one, just allocate one handle for all of them. + proc, err := procfs.OpenUnsafeProcRoot() + if err != nil { + return err + } + defer proc.Close() + + for key, value := range sysctls { + keyPath := strings.ReplaceAll(key, ".", "/") + + sysctlFile, err := procfsOpenRoot(proc, "sys/"+keyPath, unix.O_WRONLY|unix.O_TRUNC|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("open sysctl %s file: %w", key, err) + } + defer sysctlFile.Close() + + n, err := io.WriteString(sysctlFile, value) + if n != len(value) && err == nil { + err = fmt.Errorf("short write to file (%d bytes != %d bytes)", n, len(value)) + } + if err != nil { + return fmt.Errorf("failed to write sysctl %s = %q: %w", key, value, err) + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/internal/sys/verify_inode_unix.go b/vendor/github.com/opencontainers/runc/internal/sys/verify_inode_unix.go new file mode 100644 index 0000000000..d5019db57e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/internal/sys/verify_inode_unix.go @@ -0,0 +1,30 @@ +package sys + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +// VerifyInodeFunc is the callback passed to [VerifyInode] to check if the +// inode is the expected type (and on the correct filesystem type, in the case +// of filesystem-specific inodes). +type VerifyInodeFunc func(stat *unix.Stat_t, statfs *unix.Statfs_t) error + +// VerifyInode verifies that the underlying inode for the given file matches an +// expected inode type (possibly on a particular kind of filesystem). This is +// mainly a wrapper around [VerifyInodeFunc]. +func VerifyInode(file *os.File, checkFunc VerifyInodeFunc) error { + var stat unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { + return fmt.Errorf("fstat %q: %w", file.Name(), err) + } + var statfs unix.Statfs_t + if err := unix.Fstatfs(int(file.Fd()), &statfs); err != nil { + return fmt.Errorf("fstatfs %q: %w", file.Name(), err) + } + runtime.KeepAlive(file) + return checkFunc(&stat, &statfs) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md new file mode 100644 index 0000000000..901351edb7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -0,0 +1,261 @@ +# libcontainer + +[![Go Reference](https://pkg.go.dev/badge/github.com/opencontainers/runc/libcontainer.svg)](https://pkg.go.dev/github.com/opencontainers/runc/libcontainer) + +Libcontainer provides a native Go implementation for creating containers +with namespaces, cgroups, capabilities, and filesystem access controls. +It allows you to manage the lifecycle of the container performing additional operations +after the container is created. + + +## Container +A container is a self contained execution environment that shares the kernel of the +host system and which is (optionally) isolated from other containers in the system. + +## Using libcontainer + +### Container init + +Because containers are spawned in a two step process you will need a binary that +will be executed as the init process for the container. In libcontainer, we use +the current binary (/proc/self/exe) to be executed as the init process, and use +arg "init", we call the first step process "bootstrap", so you always need a "init" +function as the entry of "bootstrap". + +In addition to the go init function the early stage bootstrap is handled by importing +[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). + +For details on how runc implements such "init", see +[init.go](https://github.com/opencontainers/runc/blob/master/init.go) +and [libcontainer/init_linux.go](https://github.com/opencontainers/runc/blob/master/libcontainer/init_linux.go). + +### Device management + +If you want containers that have access to some devices, you need to import +this package into your code: + +```go + import ( + _ "github.com/opencontainers/cgroups/devices" + ) +``` + +Without doing this, libcontainer cgroup manager won't be able to set up device +access rules, and will fail if devices are specified in the container +configuration. + +### Container creation + +To create a container you first have to create a configuration +struct describing how the container is to be created. A sample would look similar to this: + +```go +defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV +var devices []*devices.Rule +for _, device := range specconv.AllowedDevices { + devices = append(devices, &device.Rule) +} +config := &configs.Config{ + Rootfs: "/your/path/to/rootfs", + Capabilities: &configs.Capabilities{ + Bounding: []string{ + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, + Namespaces: configs.Namespaces([]configs.Namespace{ + {Type: configs.NEWNS}, + {Type: configs.NEWUTS}, + {Type: configs.NEWIPC}, + {Type: configs.NEWPID}, + {Type: configs.NEWUSER}, + {Type: configs.NEWNET}, + {Type: configs.NEWCGROUP}, + }), + Cgroups: &configs.Cgroup{ + Name: "test-container", + Parent: "system", + Resources: &configs.Resources{ + MemorySwappiness: nil, + Devices: devices, + }, + }, + MaskPaths: []string{ + "/proc/kcore", + "/sys/firmware", + }, + ReadonlyPaths: []string{ + "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", + }, + Devices: specconv.AllowedDevices, + Hostname: "testing", + Mounts: []*configs.Mount{ + { + Source: "proc", + Destination: "/proc", + Device: "proc", + Flags: defaultMountFlags, + }, + { + Source: "tmpfs", + Destination: "/dev", + Device: "tmpfs", + Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, + Data: "mode=755", + }, + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + Flags: unix.MS_NOSUID | unix.MS_NOEXEC, + Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", + }, + { + Device: "tmpfs", + Source: "shm", + Destination: "/dev/shm", + Data: "mode=1777,size=65536k", + Flags: defaultMountFlags, + }, + { + Source: "mqueue", + Destination: "/dev/mqueue", + Device: "mqueue", + Flags: defaultMountFlags, + }, + { + Source: "sysfs", + Destination: "/sys", + Device: "sysfs", + Flags: defaultMountFlags | unix.MS_RDONLY, + }, + }, + UIDMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + GIDMappings: []configs.IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 65536, + }, + }, + Networks: []*configs.Network{ + { + Type: "loopback", + Address: "127.0.0.1/0", + Gateway: "localhost", + }, + }, + Rlimits: []configs.Rlimit{ + { + Type: unix.RLIMIT_NOFILE, + Hard: uint64(1025), + Soft: uint64(1025), + }, + }, +} +``` + +Once you have the configuration populated you can create a container +with a specified ID under a specified state directory: + +```go +container, err := libcontainer.Create("/run/containers", "container-id", config) +if err != nil { + logrus.Fatal(err) + return +} +``` + +To spawn bash as the initial process inside the container and have the +processes pid returned in order to wait, signal, or kill the process: + +```go +process := &libcontainer.Process{ + Args: []string{"/bin/bash"}, + Env: []string{"PATH=/bin"}, + User: "daemon", + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + Init: true, +} + +err := container.Run(process) +if err != nil { + container.Destroy() + logrus.Fatal(err) + return +} + +// wait for the process to finish. +_, err := process.Wait() +if err != nil { + logrus.Fatal(err) +} + +// destroy the container. +container.Destroy() +``` + +Additional ways to interact with a running container are: + +```go +// return all the pids for all processes running inside the container. +processes, err := container.Processes() + +// get detailed cpu, memory, io, and network statistics for the container and +// it's processes. +stats, err := container.Stats() + +// pause all processes inside the container. +container.Pause() + +// resume all paused processes. +container.Resume() + +// send signal to container's init process. +container.Signal(signal) + +// update container resource constraints. +container.Set(config) + +// get current status of the container. +status, err := container.Status() + +// get current container's state information. +state, err := container.State() +``` + + +## Checkpoint & Restore + +libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. +This lets you save the state of a process running inside a container to disk, and then restore +that state into a new process, on the same machine or on another machine. + +`criu` version 1.5.2 or higher is required to use checkpoint and restore. +If you don't already have `criu` installed, you can build it from source, following the +[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image +generated when building libcontainer with docker. + + +## Copyright and license + +Code and documentation copyright 2014 Docker, inc. +The code and documentation are released under the [Apache 2.0 license](../LICENSE). +The documentation is also released under Creative Commons Attribution 4.0 International License. +You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md b/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md new file mode 100644 index 0000000000..c6fe4eaa8a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md @@ -0,0 +1,465 @@ +## Container Specification - v1 + +This is the standard configuration for version 1 containers. It includes +namespaces, standard filesystem setup, a default Linux capability set, and +information about resource reservations. It also has information about any +populated environment settings for the processes running inside a container. + +Along with the configuration of how a container is created the standard also +discusses actions that can be performed on a container to manage and inspect +information about the processes running inside. + +The v1 profile is meant to be able to accommodate the majority of applications +with a strong security configuration. + +### System Requirements and Compatibility + +Minimum requirements: +* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches) +* Mounted cgroups with each subsystem in its own hierarchy + + +### Namespaces + +| Flag | Enabled | +| --------------- | ------- | +| CLONE_NEWPID | 1 | +| CLONE_NEWUTS | 1 | +| CLONE_NEWIPC | 1 | +| CLONE_NEWNET | 1 | +| CLONE_NEWNS | 1 | +| CLONE_NEWUSER | 1 | +| CLONE_NEWCGROUP | 1 | + +Namespaces are created for the container via the `unshare` syscall. + + +### Filesystem + +A root filesystem must be provided to a container for execution. The container +will use this root filesystem (rootfs) to jail and spawn processes inside where +the binaries and system libraries are local to that directory. Any binaries +to be executed must be contained within this rootfs. + +Mounts that happen inside the container are automatically cleaned up when the +container exits as the mount namespace is destroyed and the kernel will +unmount all the mounts that were setup within that namespace. + +For a container to execute properly there are certain filesystems that +are required to be mounted within the rootfs that the runtime will setup. + +| Path | Type | Flags | Data | +| ----------- | ------ | -------------------------------------- | ---------------------------------------- | +| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | +| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | +| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | +| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | + + +After a container's filesystems are mounted within the newly created +mount namespace `/dev` will need to be populated with a set of device nodes. +It is expected that a rootfs does not need to have any device nodes specified +for `/dev` within the rootfs as the container will setup the correct devices +that are required for executing a container's process. + +| Path | Mode | Access | +| ------------ | ---- | ---------- | +| /dev/null | 0666 | rwm | +| /dev/zero | 0666 | rwm | +| /dev/full | 0666 | rwm | +| /dev/tty | 0666 | rwm | +| /dev/random | 0666 | rwm | +| /dev/urandom | 0666 | rwm | + + +**ptmx** +`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within +the container. + +The use of a pseudo TTY is optional within a container and it should support both. +If a pseudo is provided to the container `/dev/console` will need to be +setup by binding the console in `/dev/` after it has been populated and mounted +in tmpfs. + +| Source | Destination | UID GID | Mode | Type | +| --------------- | ------------ | ------- | ---- | ---- | +| *pty host path* | /dev/console | 0 0 | 0600 | bind | + + +After `/dev/null` has been setup we check for any external links between +the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing +to `/dev/null` outside the container we close and `dup2` the `/dev/null` +that is local to the container's rootfs. + + +After the container has `/proc` mounted a few standard symlinks are setup +within `/dev/` for the io. + +| Source | Destination | +| --------------- | ----------- | +| /proc/self/fd | /dev/fd | +| /proc/self/fd/0 | /dev/stdin | +| /proc/self/fd/1 | /dev/stdout | +| /proc/self/fd/2 | /dev/stderr | + +A `pivot_root` is used to change the root for the process, effectively +jailing the process inside the rootfs. + +```c +put_old = mkdir(...); +pivot_root(rootfs, put_old); +chdir("/"); +unmount(put_old, MS_DETACH); +rmdir(put_old); +``` + +For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined +with a `chroot` is required as `pivot_root` is not supported in `ramfs`. + +```c +mount(rootfs, "/", NULL, MS_MOVE, NULL); +chroot("."); +chdir("/"); +``` + +The `umask` is set back to `0022` after the filesystem setup has been completed. + +### Resources + +Cgroups are used to handle resource allocation for containers. This includes +system resources like cpu, memory, and device access. + +| Subsystem | Enabled | +| ---------- | ------- | +| devices | 1 | +| memory | 1 | +| cpu | 1 | +| cpuacct | 1 | +| cpuset | 1 | +| blkio | 1 | +| perf_event | 1 | +| freezer | 1 | +| hugetlb | 1 | +| pids | 1 | + + +All cgroup subsystem are joined so that statistics can be collected from +each of the subsystems. Freezer does not expose any stats but is joined +so that containers can be paused and resumed. + +The parent process of the container's init must place the init pid inside +the correct cgroups before the initialization begins. This is done so +that no processes or threads escape the cgroups. This sync is +done via a pipe ( specified in the runtime section below ) that the container's +init process will block waiting for the parent to finish setup. + +### IntelRdt + +Intel platforms with new Xeon CPU support Resource Director Technology (RDT). +Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are +two sub-features of RDT. + +Cache Allocation Technology (CAT) provides a way for the software to restrict +cache allocation to a defined 'subset' of L3 cache which may be overlapping +with other 'subsets'. The different subsets are identified by class of +service (CLOS) and each CLOS has a capacity bitmask (CBM). + +Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle +over memory bandwidth for the software. A user controls the resource by +indicating the percentage of maximum memory bandwidth or memory bandwidth limit +in MBps unit if MBA Software Controller is enabled. + +It can be used to handle L3 cache and memory bandwidth resources allocation +for containers if hardware and kernel support Intel RDT CAT and MBA features. + +In Linux 4.10 kernel or newer, the interface is defined and exposed via +"resource control" filesystem, which is a "cgroup-like" interface. + +Comparing with cgroups, it has similar process management lifecycle and +interfaces in a container. But unlike cgroups' hierarchy, it has single level +filesystem layout. + +CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via +"resource control" filesystem. + +Intel RDT "resource control" filesystem hierarchy: +``` +mount -t resctrl resctrl /sys/fs/resctrl +tree /sys/fs/resctrl +/sys/fs/resctrl/ +|-- info +| |-- L3 +| | |-- cbm_mask +| | |-- min_cbm_bits +| | |-- num_closids +| |-- MB +| |-- bandwidth_gran +| |-- delay_linear +| |-- min_bandwidth +| |-- num_closids +|-- ... +|-- schemata +|-- tasks +|-- + |-- ... + |-- schemata + |-- tasks +``` + +For runc, we can make use of `tasks` and `schemata` configuration for L3 +cache and memory bandwidth resources constraints. + +The file `tasks` has a list of tasks that belongs to this group (e.g., +" group). Tasks can be added to a group by writing the task ID +to the "tasks" file (which will automatically remove them from the previous +group to which they belonged). New tasks created by fork(2) and clone(2) are +added to the same group as their parent. + +The file `schemata` has a list of all the resources available to this group. +Each resource (L3 cache, memory bandwidth) has its own line and format. + +L3 cache schema: +It has allocation bitmasks/values for L3 cache on each socket, which +contains L3 cache id and capacity bitmask (CBM). +``` + Format: "L3:=;=;..." +``` +For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" +which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + +The valid L3 cache CBM is a *contiguous bits set* and number of bits that can +be set is less than the max bit. The max bits in the CBM is varied among +supported Intel CPU models. Kernel will check if it is valid when writing. +e.g., default value 0xfffff in root indicates the max bits of CBM is 20 +bits, which mapping to entire L3 cache capacity. Some valid CBM values to +set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + +Memory bandwidth schema: +It has allocation values for memory bandwidth on each socket, which contains +L3 cache id and memory bandwidth. +``` + Format: "MB:=bandwidth0;=bandwidth1;..." +``` +For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" + +The minimum bandwidth percentage value for each CPU model is predefined and +can be looked up through "info/MB/min_bandwidth". The bandwidth granularity +that is allocated is also dependent on the CPU model and can be looked up at +"info/MB/bandwidth_gran". The available bandwidth control steps are: +min_bw + N * bw_gran. Intermediate values are rounded to the next control +step available on the hardware. + +If MBA Software Controller is enabled through mount option "-o mba_MBps" +mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl +We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit +instead of "percentages". The kernel underneath would use a software feedback +mechanism or a "Software Controller" which reads the actual bandwidth using +MBM counters and adjust the memory bandwidth percentages to ensure: +"actual memory bandwidth < user specified memory bandwidth". + +For example, on a two-socket machine, the schema line could be +"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 +and 7000 MBps memory bandwidth limit on socket 1. + +For more information about Intel RDT kernel interface: +https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + +``` +An example for runc: +Consider a two-socket machine with two L3 caches where the default CBM is +0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% +with a memory bandwidth granularity of 10%. + +Tasks inside the container only have access to the "upper" 7/11 of L3 cache +on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a +maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. + +"linux": { + "intelRdt": { + "closID": "guaranteed_group", + "l3CacheSchema": "L3:0=7f0;1=1f", + "memBwSchema": "MB:0=20;1=70" + } +} +``` + +### Security + +The standard set of Linux capabilities that are set in a container +provide a good default for security and flexibility for the applications. + + +| Capability | Enabled | +| -------------------- | ------- | +| CAP_NET_RAW | 1 | +| CAP_NET_BIND_SERVICE | 1 | +| CAP_AUDIT_READ | 1 | +| CAP_AUDIT_WRITE | 1 | +| CAP_DAC_OVERRIDE | 1 | +| CAP_SETFCAP | 1 | +| CAP_SETPCAP | 1 | +| CAP_SETGID | 1 | +| CAP_SETUID | 1 | +| CAP_MKNOD | 1 | +| CAP_CHOWN | 1 | +| CAP_FOWNER | 1 | +| CAP_FSETID | 1 | +| CAP_KILL | 1 | +| CAP_SYS_CHROOT | 1 | +| CAP_NET_BROADCAST | 0 | +| CAP_SYS_MODULE | 0 | +| CAP_SYS_RAWIO | 0 | +| CAP_SYS_PACCT | 0 | +| CAP_SYS_ADMIN | 0 | +| CAP_SYS_NICE | 0 | +| CAP_SYS_RESOURCE | 0 | +| CAP_SYS_TIME | 0 | +| CAP_SYS_TTY_CONFIG | 0 | +| CAP_AUDIT_CONTROL | 0 | +| CAP_MAC_OVERRIDE | 0 | +| CAP_MAC_ADMIN | 0 | +| CAP_NET_ADMIN | 0 | +| CAP_SYSLOG | 0 | +| CAP_DAC_READ_SEARCH | 0 | +| CAP_LINUX_IMMUTABLE | 0 | +| CAP_IPC_LOCK | 0 | +| CAP_IPC_OWNER | 0 | +| CAP_SYS_PTRACE | 0 | +| CAP_SYS_BOOT | 0 | +| CAP_LEASE | 0 | +| CAP_WAKE_ALARM | 0 | +| CAP_BLOCK_SUSPEND | 0 | + + +Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) +and [selinux](http://selinuxproject.org/page/Main_Page) can be used with +the containers. A container should support setting an apparmor profile or +selinux process and mount labels if provided in the configuration. + +Standard apparmor profile: +```c +#include +profile flags=(attach_disconnected,mediate_deleted) { + #include + network, + capability, + file, + umount, + + deny @{PROC}/sys/fs/** wklx, + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, + deny @{PROC}/sys/kernel/*/** wklx, + + deny mount, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, +} +``` + +*TODO: seccomp work is being done to find a good default config* + +### Runtime and Init Process + +During container creation the parent process needs to talk to the container's init +process and have a form of synchronization. This is accomplished by creating +a pipe that is passed to the container's init. When the init process first spawns +it will block on its side of the pipe until the parent closes its side. This +allows the parent to have time to set the new process inside a cgroup hierarchy +and/or write any uid/gid mappings required for user namespaces. +The pipe is passed to the init process via FD 3. + +The application consuming libcontainer should be compiled statically. libcontainer +does not define any init process and the arguments provided are used to `exec` the +process inside the application. There should be no long running init within the +container spec. + +If a pseudo tty is provided to a container it will open and `dup2` the console +as the container's STDIN, STDOUT, STDERR as well as mounting the console +as `/dev/console`. + +An extra set of mounts are provided to a container and setup for use. A container's +rootfs can contain some non portable files inside that can cause side effects during +execution of a process. These files are usually created and populated with the container +specific information via the runtime. + +**Extra runtime files:** +* /etc/hosts +* /etc/resolv.conf +* /etc/hostname +* /etc/localtime + + +#### Defaults + +There are a few defaults that can be overridden by users, but in their omission +these apply to processes within a container. + +| Type | Value | +| ------------------- | ------------------------------ | +| Parent Death Signal | SIGKILL | +| UID | 0 | +| GID | 0 | +| GROUPS | 0, NULL | +| CWD | "/" | +| $HOME | Current user's home dir or "/" | +| Readonly rootfs | false | +| Pseudo TTY | false | + + +## Actions + +After a container is created there is a standard set of actions that can +be done to the container. These actions are part of the public API for +a container. + +| Action | Description | +| -------------- | ------------------------------------------------------------------ | +| Get processes | Return all the pids for processes running inside a container | +| Get Stats | Return resource statistics for the container as a whole | +| Wait | Waits on the container's init process ( pid 1 ) | +| Wait Process | Wait on any of the container's processes returning the exit status | +| Destroy | Kill the container's init process and remove any filesystem state | +| Signal | Send a signal to the container's init process | +| Signal Process | Send a signal to any of the container's processes | +| Pause | Pause all processes inside the container | +| Resume | Resume all processes inside the container if paused | +| Exec | Execute a new process inside of the container ( requires setns ) | +| Set | Setup configs of the container after it's created | + +### Execute a new process inside of a running container + +User can execute a new process inside of a running container. Any binaries to be +executed must be accessible within the container's rootfs. + +The started process will run inside the container's rootfs. Any changes +made by the process to the container's filesystem will persist after the +process finished executing. + +The started process will join all the container's existing namespaces. When the +container is paused, the process will also be paused and will resume when +the container is unpaused. The started process will only run when the container's +primary process (PID 1) is running, and will not be restarted when the container +is restarted. + +#### Planned additions + +The started process will have its own cgroups nested inside the container's +cgroups. This is used for process tracking and optionally resource allocation +handling for the new process. Freezer cgroup is required, the rest of the cgroups +are optional. The process executor must place its pid inside the correct +cgroups before starting the process. This is done so that no child processes or +threads can escape the cgroups. + +When the process is stopped, the process executor will try (in a best-effort way) +to stop all its children and remove the sub-cgroups. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go new file mode 100644 index 0000000000..4b03d4c715 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go @@ -0,0 +1,16 @@ +package apparmor + +import "errors" + +var ( + // IsEnabled returns true if apparmor is enabled for the host. + IsEnabled = isEnabled + + // ApplyProfile will apply the profile with the specified name to the process after + // the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled + // on other platforms. + ApplyProfile = applyProfile + + // ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported. + ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported") +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go new file mode 100644 index 0000000000..a3a8e93258 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go @@ -0,0 +1,70 @@ +package apparmor + +import ( + "errors" + "fmt" + "os" + "sync" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/utils" +) + +var ( + appArmorEnabled bool + checkAppArmor sync.Once +) + +// isEnabled returns true if apparmor is enabled for the host. +func isEnabled() bool { + checkAppArmor.Do(func() { + if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { + buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled") + appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y' + } + }) + return appArmorEnabled +} + +func setProcAttr(attr, value string) error { + attr = utils.CleanPath(attr) + attrSubPath := "attr/apparmor/" + attr + if _, err := os.Stat("/proc/self/" + attrSubPath); errors.Is(err, os.ErrNotExist) { + // fall back to the old convention + attrSubPath = "attr/" + attr + } + + // Under AppArmor you can only change your own attr, so there's no reason + // to not use /proc/thread-self/ (instead of /proc//, like libapparmor + // does). + f, closer, err := pathrs.ProcThreadSelfOpen(attrSubPath, unix.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer closer() + defer f.Close() + + _, err = f.WriteString(value) + return err +} + +// changeOnExec reimplements aa_change_onexec from libapparmor in Go +func changeOnExec(name string) error { + if err := setProcAttr("exec", "exec "+name); err != nil { + return fmt.Errorf("apparmor failed to apply profile: %w", err) + } + return nil +} + +// applyProfile will apply the profile with the specified name to the process after +// the next exec. It is only supported on Linux and produces an error on other +// platforms. +func applyProfile(name string) error { + if name == "" { + return nil + } + + return changeOnExec(name) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go new file mode 100644 index 0000000000..4484cd2397 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go @@ -0,0 +1,14 @@ +//go:build !linux + +package apparmor + +func isEnabled() bool { + return false +} + +func applyProfile(name string) error { + if name != "" { + return ErrApparmorNotEnabled + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go new file mode 100644 index 0000000000..379e66c66c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go @@ -0,0 +1,158 @@ +//go:build linux + +package capabilities + +import ( + "errors" + "fmt" + "sort" + "strings" + "sync" + "syscall" + + "github.com/moby/sys/capability" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/sirupsen/logrus" +) + +func capToStr(c capability.Cap) string { + return "CAP_" + strings.ToUpper(c.String()) +} + +var capMap = sync.OnceValues(func() (map[string]capability.Cap, error) { + list, err := capability.ListSupported() + if err != nil { + return nil, err + } + cm := make(map[string]capability.Cap, len(list)) + for _, c := range list { + cm[capToStr(c)] = c + } + return cm, nil +}) + +// KnownCapabilities returns the list of the known capabilities. +// Used by `runc features`. +func KnownCapabilities() []string { + list := capability.ListKnown() + res := make([]string, len(list)) + for i, c := range list { + res[i] = "CAP_" + strings.ToUpper(c.String()) + } + return res +} + +// New creates a new Caps from the given Capabilities config. Unknown Capabilities +// or Capabilities that are unavailable in the current environment are ignored, +// printing a warning instead. +func New(capConfig *configs.Capabilities) (*Caps, error) { + var c Caps + if capConfig == nil { + return &c, nil + } + + _, err := capMap() + if err != nil { + return nil, err + } + unknownCaps := make(map[string]struct{}) + c.caps = map[capability.CapType][]capability.Cap{ + capability.BOUNDING: capSlice(capConfig.Bounding, unknownCaps), + capability.EFFECTIVE: capSlice(capConfig.Effective, unknownCaps), + capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps), + capability.PERMITTED: capSlice(capConfig.Permitted, unknownCaps), + capability.AMBIENT: capSlice(capConfig.Ambient, unknownCaps), + } + if c.pid, err = capability.NewPid2(0); err != nil { + return nil, err + } + if len(unknownCaps) > 0 { + logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps)) + } + return &c, nil +} + +// capSlice converts the slice of capability names in caps, to their numeric +// equivalent, and returns them as a slice. Unknown or unavailable capabilities +// are not returned, but appended to unknownCaps. +func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap { + cm, _ := capMap() + out := make([]capability.Cap, 0, len(caps)) + for _, c := range caps { + if v, ok := cm[c]; !ok { + unknownCaps[c] = struct{}{} + } else { + out = append(out, v) + } + } + return out +} + +// mapKeys returns the keys of input in sorted order +func mapKeys(input map[string]struct{}) []string { + keys := make([]string, 0, len(input)) + for c := range input { + keys = append(keys, c) + } + sort.Strings(keys) + return keys +} + +// Caps holds the capabilities for a container. +type Caps struct { + pid capability.Capabilities + caps map[capability.CapType][]capability.Cap +} + +// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. +func (c *Caps) ApplyBoundingSet() error { + if c.pid == nil { + return nil + } + c.pid.Clear(capability.BOUNDING) + c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...) + return c.pid.Apply(capability.BOUNDING) +} + +// ApplyCaps sets all the capabilities for the current process in the config. +func (c *Caps) ApplyCaps() error { + if c.pid == nil { + return nil + } + c.pid.Clear(capability.CAPS | capability.BOUNDS) + for _, g := range []capability.CapType{ + capability.EFFECTIVE, + capability.PERMITTED, + capability.INHERITABLE, + capability.BOUNDING, + } { + c.pid.Set(g, c.caps[g]...) + } + if err := c.pid.Apply(capability.CAPS | capability.BOUNDS); err != nil { + return fmt.Errorf("can't apply capabilities: %w", err) + } + + // Old version of capability package used to ignore errors from setting + // ambient capabilities, which is now fixed (see + // https://github.com/kolyshkin/capability/pull/3). + // + // To maintain backward compatibility, set ambient caps one by one and + // don't return any errors, only warn. + ambs := c.caps[capability.AMBIENT] + err := capability.ResetAmbient() + + // EINVAL is returned when the kernel doesn't support ambient capabilities. + // We ignore this because runc supports running on older kernels. + if err != nil && !errors.Is(err, syscall.EINVAL) { + return err + } + + for _, a := range ambs { + err := capability.SetAmbient(true, a) + if err != nil { + logrus.Warnf("can't raise ambient capability %s: %v", capToStr(a), err) + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go new file mode 100644 index 0000000000..d7b5ce960d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go @@ -0,0 +1,3 @@ +//go:build !linux + +package capabilities diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_deprecated.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_deprecated.go new file mode 100644 index 0000000000..7836b240ab --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_deprecated.go @@ -0,0 +1,29 @@ +package configs + +import "github.com/opencontainers/cgroups" + +// Deprecated: use [github.com/opencontainers/cgroups]. +type ( + Cgroup = cgroups.Cgroup + Resources = cgroups.Resources + FreezerState = cgroups.FreezerState + LinuxRdma = cgroups.LinuxRdma + BlockIODevice = cgroups.BlockIODevice + WeightDevice = cgroups.WeightDevice + ThrottleDevice = cgroups.ThrottleDevice + HugepageLimit = cgroups.HugepageLimit + IfPrioMap = cgroups.IfPrioMap +) + +// Deprecated: use [github.com/opencontainers/cgroups]. +const ( + Undefined = cgroups.Undefined + Frozen = cgroups.Frozen + Thawed = cgroups.Thawed +) + +// Deprecated: use [github.com/opencontainers/cgroups]. +var ( + NewWeightDevice = cgroups.NewWeightDevice + NewThrottleDevice = cgroups.NewThrottleDevice +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go new file mode 100644 index 0000000000..3869a2edc2 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -0,0 +1,619 @@ +// Package configs provides various container-related configuration types +// used by libcontainer. +package configs + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "os/exec" + "strconv" + "strings" + "time" + "unsafe" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + devices "github.com/opencontainers/cgroups/devices/config" + "github.com/opencontainers/runtime-spec/specs-go" +) + +type Rlimit struct { + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` +} + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int64 `json:"container_id"` + HostID int64 `json:"host_id"` + Size int64 `json:"size"` +} + +// Seccomp represents syscall restrictions +// By default, only the native architecture of the kernel is allowed to be used +// for syscalls. Additional architectures can be added by specifying them in +// Architectures. +type Seccomp struct { + DefaultAction Action `json:"default_action"` + Architectures []string `json:"architectures"` + Flags []specs.LinuxSeccompFlag `json:"flags"` + Syscalls []*Syscall `json:"syscalls"` + DefaultErrnoRet *uint `json:"default_errno_ret"` + ListenerPath string `json:"listener_path,omitempty"` + ListenerMetadata string `json:"listener_metadata,omitempty"` +} + +// Action is taken upon rule match in Seccomp +type Action int + +const ( + Kill Action = iota + 1 + Errno + Trap + Allow + Trace + Log + Notify + KillThread + KillProcess +) + +// Operator is a comparison operator to be used when matching syscall arguments in Seccomp +type Operator int + +const ( + EqualTo Operator = iota + 1 + NotEqualTo + GreaterThan + GreaterThanOrEqualTo + LessThan + LessThanOrEqualTo + MaskEqualTo +) + +// Arg is a rule to match a specific syscall argument in Seccomp +type Arg struct { + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` +} + +// Syscall is a rule to match a syscall in Seccomp +type Syscall struct { + Name string `json:"name"` + Action Action `json:"action"` + ErrnoRet *uint `json:"errnoRet"` + Args []*Arg `json:"args"` +} + +// Config defines configuration options for executing a process inside a contained environment. +type Config struct { + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root"` + + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal"` + + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + + // Umask is the umask to use inside of the container. + Umask *uint32 `json:"umask"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs"` + + // Specifies the mount propagation flags to be applied to /. + RootPropagation int `json:"rootPropagation"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + Devices []*devices.Device `json:"devices"` + + MountLabel string `json:"mount_label"` + + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname"` + + // Domainname optionally sets the container's domainname if provided + Domainname string `json:"domainname"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces Namespaces `json:"namespaces"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capabilities not specified will be dropped from the processes capability mask + Capabilities *Capabilities `json:"capabilities"` + + // Networks specifies the container's network setup to be created + Networks []*Network `json:"networks"` + + // Routes can be specified to create entries in the route table as the container is started + Routes []*Route `json:"routes"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available. + Cgroups *cgroups.Cgroup `json:"cgroups"` + + // AppArmorProfile specifies the profile to apply to the process running in the container and is + // change at the time the process is execed + AppArmorProfile string `json:"apparmor_profile,omitempty"` + + // ProcessLabel specifies the label to apply to the process running in the container. It is + // commonly used by selinux + ProcessLabel string `json:"process_label,omitempty"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits,omitempty"` + + // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores + // for a process. Valid values are between the range [-1000, '1000'], where processes with + // higher scores are preferred for being killed. If it is unset then we don't touch the current + // value. + // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ + OomScoreAdj *int `json:"oom_score_adj,omitempty"` + + // UIDMappings is an array of User ID mappings for User Namespaces + UIDMappings []IDMap `json:"uid_mappings"` + + // GIDMappings is an array of Group ID mappings for User Namespaces + GIDMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` + + // Sysctl is a map of properties and their values. It is the equivalent of using + // sysctl -w my.property.name value in Linux. + Sysctl map[string]string `json:"sysctl"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. + Seccomp *Seccomp `json:"seccomp"` + + // NoNewPrivileges controls whether processes in the container can gain additional privileges. + NoNewPrivileges bool `json:"no_new_privileges,omitempty"` + + // Hooks are a collection of actions to perform at various container lifecycle events. + // CommandHooks are serialized to JSON, but other hooks are not. + Hooks Hooks + + // Version is the version of opencontainer specification that is supported. + Version string `json:"version"` + + // Labels are user defined metadata that is stored in the config and populated on the state + Labels []string `json:"labels"` + + // NoNewKeyring will not allocated a new session keyring for the container. It will use the + // callers keyring in this case. + NoNewKeyring bool `json:"no_new_keyring"` + + // IntelRdt specifies settings for Intel RDT group that the container is placed into + // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available + IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` + + // RootlessEUID is set when the runc was launched with non-zero EUID. + // Note that RootlessEUID is set to false when launched with EUID=0 in userns. + // When RootlessEUID is set, runc creates a new userns for the container. + // (config.json needs to contain userns settings) + RootlessEUID bool `json:"rootless_euid,omitempty"` + + // RootlessCgroups is set when unlikely to have the full access to cgroups. + // When RootlessCgroups is set, cgroups errors are ignored. + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + + // TimeOffsets specifies the offset for supporting time namespaces. + TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` + + // Scheduler represents the scheduling attributes for a process. + Scheduler *Scheduler `json:"scheduler,omitempty"` + + // Personality contains configuration for the Linux personality syscall. + Personality *LinuxPersonality `json:"personality,omitempty"` + + // IOPriority is the container's I/O priority. + IOPriority *IOPriority `json:"io_priority,omitempty"` + + // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container. + ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"` +} + +// Scheduler is based on the Linux sched_setattr(2) syscall. +type Scheduler = specs.Scheduler + +// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr. +func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { + var policy uint32 + switch scheduler.Policy { + case specs.SchedOther: + policy = 0 + case specs.SchedFIFO: + policy = 1 + case specs.SchedRR: + policy = 2 + case specs.SchedBatch: + policy = 3 + case specs.SchedISO: + policy = 4 + case specs.SchedIdle: + policy = 5 + case specs.SchedDeadline: + policy = 6 + default: + return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy) + } + + var flags uint64 + for _, flag := range scheduler.Flags { + switch flag { + case specs.SchedFlagResetOnFork: + flags |= 0x01 + case specs.SchedFlagReclaim: + flags |= 0x02 + case specs.SchedFlagDLOverrun: + flags |= 0x04 + case specs.SchedFlagKeepPolicy: + flags |= 0x08 + case specs.SchedFlagKeepParams: + flags |= 0x10 + case specs.SchedFlagUtilClampMin: + flags |= 0x20 + case specs.SchedFlagUtilClampMax: + flags |= 0x40 + default: + return nil, fmt.Errorf("invalid scheduler flag: %s", flag) + } + } + + return &unix.SchedAttr{ + Size: unix.SizeofSchedAttr, + Policy: policy, + Flags: flags, + Nice: scheduler.Nice, + Priority: uint32(scheduler.Priority), + Runtime: scheduler.Runtime, + Deadline: scheduler.Deadline, + Period: scheduler.Period, + }, nil +} + +type IOPriority = specs.LinuxIOPriority + +type CPUAffinity struct { + Initial, Final *unix.CPUSet +} + +func toCPUSet(str string) (*unix.CPUSet, error) { + if str == "" { + return nil, nil + } + s := new(unix.CPUSet) + + // Since (*CPUset).Set silently ignores too high CPU values, + // find out what the maximum is, and return an error. + maxCPU := uint64(unsafe.Sizeof(*s) * 8) + toInt := func(v string) (int, error) { + ret, err := strconv.ParseUint(v, 10, 32) + if err != nil { + return 0, err + } + if ret >= maxCPU { + return 0, fmt.Errorf("values larger than %d are not supported", maxCPU-1) + } + return int(ret), nil + } + + for _, r := range strings.Split(str, ",") { + // Allow extra spaces around. + r = strings.TrimSpace(r) + // Allow empty elements (extra commas). + if r == "" { + continue + } + if r0, r1, found := strings.Cut(r, "-"); found { + start, err := toInt(r0) + if err != nil { + return nil, err + } + end, err := toInt(r1) + if err != nil { + return nil, err + } + if start > end { + return nil, errors.New("invalid range: " + r) + } + for i := start; i <= end; i++ { + s.Set(i) + } + } else { + val, err := toInt(r) + if err != nil { + return nil, err + } + s.Set(val) + } + } + if s.Count() == 0 { + return nil, fmt.Errorf("no CPUs found in %q", str) + } + + return s, nil +} + +// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity]. +func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) { + if sa == nil { + return nil, nil + } + initial, err := toCPUSet(sa.Initial) + if err != nil { + return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err) + } + final, err := toCPUSet(sa.Final) + if err != nil { + return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err) + } + if initial == nil && final == nil { + return nil, nil + } + + return &CPUAffinity{ + Initial: initial, + Final: final, + }, nil +} + +type ( + HookName string + HookList []Hook + Hooks map[HookName]HookList +) + +const ( + // Prestart commands are executed after the container namespaces are created, + // but before the user supplied command is executed from init. + // Note: This hook is now deprecated + // Prestart commands are called in the Runtime namespace. + Prestart HookName = "prestart" + + // CreateRuntime commands MUST be called as part of the create operation after + // the runtime environment has been created but before the pivot_root has been executed. + // CreateRuntime is called immediately after the deprecated Prestart hook. + // CreateRuntime commands are called in the Runtime Namespace. + CreateRuntime HookName = "createRuntime" + + // CreateContainer commands MUST be called as part of the create operation after + // the runtime environment has been created but before the pivot_root has been executed. + // CreateContainer commands are called in the Container namespace. + CreateContainer HookName = "createContainer" + + // StartContainer commands MUST be called as part of the start operation and before + // the container process is started. + // StartContainer commands are called in the Container namespace. + StartContainer HookName = "startContainer" + + // Poststart commands are executed after the container init process starts. + // Poststart commands are called in the Runtime Namespace. + Poststart HookName = "poststart" + + // Poststop commands are executed after the container init process exits. + // Poststop commands are called in the Runtime Namespace. + Poststop HookName = "poststop" +) + +// HasHook checks if config has any hooks with any given names configured. +func (c *Config) HasHook(names ...HookName) bool { + if c.Hooks == nil { + return false + } + for _, h := range names { + if len(c.Hooks[h]) > 0 { + return true + } + } + return false +} + +// KnownHookNames returns the known hook names. +// Used by `runc features`. +func KnownHookNames() []string { + return []string{ + string(Prestart), // deprecated + string(CreateRuntime), + string(CreateContainer), + string(StartContainer), + string(Poststart), + string(Poststop), + } +} + +type Capabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string + // Effective is the set of capabilities checked by the kernel. + Effective []string + // Inheritable is the capabilities preserved across execve. + Inheritable []string + // Permitted is the limiting superset for effective capabilities. + Permitted []string + // Ambient is the ambient set of capabilities that are kept. + Ambient []string +} + +// Deprecated: use [Hooks.Run] instead. +func (hooks HookList) RunHooks(state *specs.State) error { + for i, h := range hooks { + if err := h.Run(state); err != nil { + return fmt.Errorf("error running hook #%d: %w", i, err) + } + } + + return nil +} + +func (hooks *Hooks) UnmarshalJSON(b []byte) error { + var state map[HookName][]CommandHook + + if err := json.Unmarshal(b, &state); err != nil { + return err + } + + *hooks = Hooks{} + for n, commandHooks := range state { + if len(commandHooks) == 0 { + continue + } + + (*hooks)[n] = HookList{} + for _, h := range commandHooks { + (*hooks)[n] = append((*hooks)[n], h) + } + } + + return nil +} + +func (hooks *Hooks) MarshalJSON() ([]byte, error) { + serialize := func(hooks []Hook) (serializableHooks []CommandHook) { + for _, hook := range hooks { + switch chook := hook.(type) { + case CommandHook: + serializableHooks = append(serializableHooks, chook) + default: + logrus.Warnf("cannot serialize hook of type %T, skipping", hook) + } + } + + return serializableHooks + } + + return json.Marshal(map[string]interface{}{ + "prestart": serialize((*hooks)[Prestart]), + "createRuntime": serialize((*hooks)[CreateRuntime]), + "createContainer": serialize((*hooks)[CreateContainer]), + "startContainer": serialize((*hooks)[StartContainer]), + "poststart": serialize((*hooks)[Poststart]), + "poststop": serialize((*hooks)[Poststop]), + }) +} + +// Run executes all hooks for the given hook name. +func (hooks Hooks) Run(name HookName, state *specs.State) error { + list := hooks[name] + for i, h := range list { + if err := h.Run(state); err != nil { + return fmt.Errorf("error running %s hook #%d: %w", name, i, err) + } + } + + return nil +} + +// SetDefaultEnv sets the environment for those CommandHook entries +// that do not have one set. +func (hooks HookList) SetDefaultEnv(env []string) { + for _, h := range hooks { + if ch, ok := h.(CommandHook); ok && len(ch.Env) == 0 { + ch.Env = env + } + } +} + +type Hook interface { + // Run executes the hook with the provided state. + Run(*specs.State) error +} + +// NewFunctionHook will call the provided function when the hook is run. +func NewFunctionHook(f func(*specs.State) error) FuncHook { + return FuncHook{ + run: f, + } +} + +type FuncHook struct { + run func(*specs.State) error +} + +func (f FuncHook) Run(s *specs.State) error { + return f.run(s) +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` + Timeout *time.Duration `json:"timeout"` +} + +// NewCommandHook will execute the provided command when the hook is run. +func NewCommandHook(cmd *Command) CommandHook { + return CommandHook{ + Command: cmd, + } +} + +type CommandHook struct { + *Command +} + +func (c *Command) Run(s *specs.State) error { + b, err := json.Marshal(s) + if err != nil { + return err + } + var stdout, stderr bytes.Buffer + cmd := exec.Cmd{ + Path: c.Path, + Args: c.Args, + Env: c.Env, + Stdin: bytes.NewReader(b), + Stdout: &stdout, + Stderr: &stderr, + } + if err := cmd.Start(); err != nil { + return err + } + errC := make(chan error, 1) + go func() { + err := cmd.Wait() + if err != nil { + err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String()) + } + errC <- err + }() + var timerCh <-chan time.Time + if c.Timeout != nil { + timer := time.NewTimer(*c.Timeout) + defer timer.Stop() + timerCh = timer.C + } + select { + case err := <-errC: + return err + case <-timerCh: + _ = cmd.Process.Kill() + <-errC + return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go new file mode 100644 index 0000000000..e401f5331b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go @@ -0,0 +1,97 @@ +package configs + +import ( + "errors" + "fmt" + "math" +) + +var ( + errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found") + errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found") +) + +// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details. +// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h +const ( + PerLinux = 0x0000 + PerLinux32 = 0x0008 +) + +type LinuxPersonality struct { + // Domain for the personality + // can only contain values "LINUX" and "LINUX32" + Domain int `json:"domain"` +} + +// HostUID gets the translated uid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostUID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if len(c.UIDMappings) == 0 { + return -1, errNoUIDMap + } + id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings) + if !found { + return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId) + } + // If we are a 32-bit binary running on a 64-bit system, it's possible + // the mapped user is too large to store in an int, which means we + // cannot do the mapping. We can't just return an int64, because + // os.Setuid() takes an int. + if id > math.MaxInt { + return -1, fmt.Errorf("mapping for uid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt) + } + return int(id), nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootUID gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootUID() (int, error) { + return c.HostUID(0) +} + +// HostGID gets the translated gid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostGID(containerId int) (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if len(c.GIDMappings) == 0 { + return -1, errNoGIDMap + } + id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings) + if !found { + return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId) + } + // If we are a 32-bit binary running on a 64-bit system, it's possible + // the mapped user is too large to store in an int, which means we + // cannot do the mapping. We can't just return an int64, because + // os.Setgid() takes an int. + if id > math.MaxInt { + return -1, fmt.Errorf("mapping for gid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt) + } + return int(id), nil + } + // Return unchanged id. + return containerId, nil +} + +// HostRootGID gets the root gid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootGID() (int, error) { + return c.HostGID(0) +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go new file mode 100644 index 0000000000..1fd87ce6a4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go @@ -0,0 +1,9 @@ +//go:build gofuzz + +package configs + +func FuzzUnmarshalJSON(data []byte) int { + hooks := Hooks{} + _ = hooks.UnmarshalJSON(data) + return 1 +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go new file mode 100644 index 0000000000..f8d951ab8b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go @@ -0,0 +1,16 @@ +package configs + +type IntelRdt struct { + // The identity for RDT Class of Service + ClosID string `json:"closID,omitempty"` + + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The schema of memory bandwidth per L3 cache id + // Format: "MB:=bandwidth0;=bandwidth1;..." + // The unit of memory bandwidth is specified in "percentages" by + // default, and in "MBps" if MBA Software Controller is enabled. + MemBwSchema string `json:"memBwSchema,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go new file mode 100644 index 0000000000..bfd356e497 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -0,0 +1,7 @@ +package configs + +const ( + // EXT_COPYUP is a directive to copy up the contents of a directory when + // a tmpfs is mounted over it. + EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go new file mode 100644 index 0000000000..b69e9ab238 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go @@ -0,0 +1,66 @@ +package configs + +import "golang.org/x/sys/unix" + +type MountIDMapping struct { + // Recursive indicates if the mapping needs to be recursive. + Recursive bool `json:"recursive"` + + // UserNSPath is a path to a user namespace that indicates the necessary + // id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and + // GIDMappings must be set to nil. + UserNSPath string `json:"userns_path,omitempty"` + + // UIDMappings is the uid mapping set for this mount, to be used with + // MOUNT_ATTR_IDMAP. + UIDMappings []IDMap `json:"uid_mappings,omitempty"` + + // GIDMappings is the gid mapping set for this mount, to be used with + // MOUNT_ATTR_IDMAP. + GIDMappings []IDMap `json:"gid_mappings,omitempty"` +} + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Mount flags that were explicitly cleared in the configuration (meaning + // the user explicitly requested that these flags *not* be set). + ClearedFlags int `json:"cleared_flags"` + + // Propagation Flags + PropagationFlags []int `json:"propagation_flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). + RecAttr *unix.MountAttr `json:"rec_attr"` + + // Extensions are additional flags that are specific to runc. + Extensions int `json:"extensions"` + + // Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, + // the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. + IDMapping *MountIDMapping `json:"id_mapping,omitempty"` +} + +func (m *Mount) IsBind() bool { + return m.Flags&unix.MS_BIND != 0 +} + +func (m *Mount) IsIDMapped() bool { + return m.IDMapping != nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go new file mode 100644 index 0000000000..1d4d9fe52a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go @@ -0,0 +1,9 @@ +//go:build !linux + +package configs + +type Mount struct{} + +func (m *Mount) IsBind() bool { + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go new file mode 100644 index 0000000000..a3329a31a9 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go @@ -0,0 +1,5 @@ +package configs + +type NamespaceType string + +type Namespaces []Namespace diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go new file mode 100644 index 0000000000..898f96fd0f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go @@ -0,0 +1,133 @@ +package configs + +import ( + "fmt" + "os" + "sync" +) + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" + NEWCGROUP NamespaceType = "NEWCGROUP" + NEWTIME NamespaceType = "NEWTIME" +) + +var ( + nsLock sync.Mutex + supportedNamespaces = make(map[NamespaceType]bool) +) + +// NsName converts the namespace type to its filename +func NsName(ns NamespaceType) string { + switch ns { + case NEWNET: + return "net" + case NEWNS: + return "mnt" + case NEWPID: + return "pid" + case NEWIPC: + return "ipc" + case NEWUSER: + return "user" + case NEWUTS: + return "uts" + case NEWCGROUP: + return "cgroup" + case NEWTIME: + return "time" + } + return "" +} + +// IsNamespaceSupported returns whether a namespace is available or +// not +func IsNamespaceSupported(ns NamespaceType) bool { + nsLock.Lock() + defer nsLock.Unlock() + supported, ok := supportedNamespaces[ns] + if ok { + return supported + } + nsFile := NsName(ns) + // if the namespace type is unknown, just return false + if nsFile == "" { + return false + } + // We don't need to use /proc/thread-self here because the list of + // namespace types is unrelated to the thread. This lets us avoid having to + // do runtime.LockOSThread. + _, err := os.Stat("/proc/self/ns/" + nsFile) + // a namespace is supported if it exists and we have permissions to read it + supported = err == nil + supportedNamespaces[ns] = supported + return supported +} + +func NamespaceTypes() []NamespaceType { + return []NamespaceType{ + NEWUSER, // Keep user NS always first, don't move it. + NEWIPC, + NEWUTS, + NEWNET, + NEWPID, + NEWNS, + NEWCGROUP, + NEWTIME, + } +} + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path"` +} + +func (n *Namespace) GetPath(pid int) string { + return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type)) +} + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} + +func (n *Namespaces) PathOf(t NamespaceType) string { + i := n.index(t) + if i == -1 { + return "" + } + return (*n)[i].Path +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go new file mode 100644 index 0000000000..26b70b26fa --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -0,0 +1,45 @@ +//go:build linux + +package configs + +import "golang.org/x/sys/unix" + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: unix.CLONE_NEWNET, + NEWNS: unix.CLONE_NEWNS, + NEWUSER: unix.CLONE_NEWUSER, + NEWIPC: unix.CLONE_NEWIPC, + NEWUTS: unix.CLONE_NEWUTS, + NEWPID: unix.CLONE_NEWPID, + NEWCGROUP: unix.CLONE_NEWCGROUP, + NEWTIME: unix.CLONE_NEWTIME, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} + +// IsPrivate tells whether the namespace of type t is configured as private +// (i.e. it exists and is not shared). +func (n Namespaces) IsPrivate(t NamespaceType) bool { + for _, v := range n { + if v.Type == t { + return v.Path == "" + } + } + // Not found, so implicitly sharing a parent namespace. + return false +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go new file mode 100644 index 0000000000..10bf243650 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -0,0 +1,13 @@ +//go:build !linux && !windows + +package configs + +func (n *Namespace) Syscall() int { + panic("No namespace syscall support") +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This function returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + panic("No namespace syscall support") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go new file mode 100644 index 0000000000..914684993c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go @@ -0,0 +1,7 @@ +//go:build !linux + +package configs + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct{} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go new file mode 100644 index 0000000000..c44c3ea71b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go @@ -0,0 +1,75 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omitted from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type"` + + // Name of the network interface + Name string `json:"name"` + + // The bridge to use. + Bridge string `json:"bridge"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen"` + + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name"` + + // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface + // bridge port in the case of type veth + // Note: This is unsupported on some systems. + // Note: This does not apply to loopback interfaces. + HairpinMode bool `json:"hairpin_mode"` +} + +// Route defines a routing table entry. +// +// Routes can be specified to create entries in the routing table as the container +// is started. +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and omitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Destination specifies the destination IP address and mask in the CIDR form. + Destination string `json:"destination"` + + // Source specifies the source IP address and mask in the CIDR form. + Source string `json:"source"` + + // Gateway specifies the gateway IP address. + Gateway string `json:"gateway"` + + // InterfaceName specifies the device to set this route up for, for example eth0. + InterfaceName string `json:"interface_name"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go new file mode 100644 index 0000000000..88cf1cd60f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go @@ -0,0 +1,87 @@ +package validate + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// rootlessEUIDCheck makes sure that the config can be applied when runc +// is being executed as a non-root user (euid != 0) in the current user namespace. +func rootlessEUIDCheck(config *configs.Config) error { + if !config.RootlessEUID { + return nil + } + if err := rootlessEUIDMappings(config); err != nil { + return err + } + if err := rootlessEUIDMount(config); err != nil { + return err + } + + // XXX: We currently can't verify the user config at all, because + // configs.Config doesn't store the user-related configs. So this + // has to be verified by setupUser() in init_linux.go. + + return nil +} + +func rootlessEUIDMappings(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWUSER) { + return errors.New("rootless container requires user namespaces") + } + // We only require mappings if we are not joining another userns. + if config.Namespaces.IsPrivate(configs.NEWUSER) { + if len(config.UIDMappings) == 0 { + return errors.New("rootless containers requires at least one UID mapping") + } + if len(config.GIDMappings) == 0 { + return errors.New("rootless containers requires at least one GID mapping") + } + } + return nil +} + +// rootlessEUIDMount verifies that all mounts have valid uid=/gid= options, +// i.e. their arguments has proper ID mappings. +func rootlessEUIDMount(config *configs.Config) error { + // XXX: We could whitelist allowed devices at this point, but I'm not + // convinced that's a good idea. The kernel is the best arbiter of + // access control. + + // Check that the options list doesn't contain any uid= or gid= entries + // that don't resolve to root. + for _, mount := range config.Mounts { + // Look for a common substring; skip further processing + // if there can't be any uid= or gid= options. + if !strings.Contains(mount.Data, "id=") { + continue + } + for _, opt := range strings.Split(mount.Data, ",") { + if str, ok := strings.CutPrefix(opt, "uid="); ok { + uid, err := strconv.Atoi(str) + if err != nil { + // Ignore unknown mount options. + continue + } + if _, err := config.HostUID(uid); err != nil { + return fmt.Errorf("cannot specify %s mount option for rootless container: %w", opt, err) + } + } else if str, ok := strings.CutPrefix(opt, "gid="); ok { + gid, err := strconv.Atoi(str) + if err != nil { + // Ignore unknown mount options. + continue + } + if _, err := config.HostGID(gid); err != nil { + return fmt.Errorf("cannot specify %s mount option for rootless container: %w", opt, err) + } + } + } + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go new file mode 100644 index 0000000000..e0052900f4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go @@ -0,0 +1,418 @@ +package validate + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runtime-spec/specs-go" + selinux "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +type check func(config *configs.Config) error + +func Validate(config *configs.Config) error { + checks := []check{ + cgroupsCheck, + rootfs, + network, + uts, + security, + namespaces, + sysctl, + intelrdtCheck, + rootlessEUIDCheck, + mountsStrict, + scheduler, + ioPriority, + } + for _, c := range checks { + if err := c(config); err != nil { + return err + } + } + // Relaxed validation rules for backward compatibility + warns := []check{ + mountsWarn, + } + for _, c := range warns { + if err := c(config); err != nil { + logrus.WithError(err).Warn("configuration") + } + } + return nil +} + +// rootfs validates if the rootfs is an absolute path and is not a symlink +// to the container's root filesystem. +func rootfs(config *configs.Config) error { + if _, err := os.Stat(config.Rootfs); err != nil { + return fmt.Errorf("invalid rootfs: %w", err) + } + cleaned, err := filepath.Abs(config.Rootfs) + if err != nil { + return fmt.Errorf("invalid rootfs: %w", err) + } + if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { + return fmt.Errorf("invalid rootfs: %w", err) + } + if filepath.Clean(config.Rootfs) != cleaned { + return errors.New("invalid rootfs: not an absolute path, or a symlink") + } + return nil +} + +func network(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + if len(config.Networks) > 0 || len(config.Routes) > 0 { + return errors.New("unable to apply network settings without a private NET namespace") + } + } + return nil +} + +func uts(config *configs.Config) error { + if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return errors.New("unable to set hostname without a private UTS namespace") + } + if config.Domainname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return errors.New("unable to set domainname without a private UTS namespace") + } + return nil +} + +func security(config *configs.Config) error { + // restrict sys without mount namespace + if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && + !config.Namespaces.Contains(configs.NEWNS) { + return errors.New("unable to restrict sys entries without a private MNT namespace") + } + if config.ProcessLabel != "" && !selinux.GetEnabled() { + return errors.New("selinux label is specified in config, but selinux is disabled or not supported") + } + + return nil +} + +func namespaces(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWUSER) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + return errors.New("user namespaces aren't enabled in the kernel") + } + hasPath := config.Namespaces.PathOf(configs.NEWUSER) != "" + hasMappings := config.UIDMappings != nil || config.GIDMappings != nil + if !hasPath && !hasMappings { + return errors.New("user namespaces enabled, but no namespace path to join nor mappings to apply specified") + } + // The hasPath && hasMappings validation case is handled in specconv -- + // we cache the mappings in Config during specconv in the hasPath case, + // so we cannot do that validation here. + } else { + if config.UIDMappings != nil || config.GIDMappings != nil { + return errors.New("user namespace mappings specified, but user namespace isn't enabled in the config") + } + } + + if config.Namespaces.Contains(configs.NEWCGROUP) { + if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { + return errors.New("cgroup namespaces aren't enabled in the kernel") + } + } + + if config.Namespaces.Contains(configs.NEWTIME) { + if _, err := os.Stat("/proc/self/timens_offsets"); os.IsNotExist(err) { + return errors.New("time namespaces aren't enabled in the kernel") + } + hasPath := config.Namespaces.PathOf(configs.NEWTIME) != "" + hasOffsets := config.TimeOffsets != nil + if hasPath && hasOffsets { + return errors.New("time namespace enabled, but both namespace path and time offsets specified -- you may only provide one") + } + } else { + if config.TimeOffsets != nil { + return errors.New("time namespace offsets specified, but time namespace isn't enabled in the config") + } + } + + return nil +} + +// convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format. +// The '/' separator is also accepted in place of a '.'. +// Convert the sysctl variables to dots separator format for validation. +// More info: sysctl(8), sysctl.d(5). +// +// For example: +// Input sysctl variable "net/ipv4/conf/eno2.100.rp_filter" +// will return the converted value "net.ipv4.conf.eno2/100.rp_filter" +func convertSysctlVariableToDotsSeparator(val string) string { + if val == "" { + return val + } + firstSepIndex := strings.IndexAny(val, "./") + if firstSepIndex == -1 || val[firstSepIndex] == '.' { + return val + } + + f := func(r rune) rune { + switch r { + case '.': + return '/' + case '/': + return '.' + } + return r + } + return strings.Map(f, val) +} + +// sysctl validates that the specified sysctl keys are valid or not. +// /proc/sys isn't completely namespaced and depending on which namespaces +// are specified, a subset of sysctls are permitted. +func sysctl(config *configs.Config) error { + validSysctlMap := map[string]bool{ + "kernel.msgmax": true, + "kernel.msgmnb": true, + "kernel.msgmni": true, + "kernel.sem": true, + "kernel.shmall": true, + "kernel.shmmax": true, + "kernel.shmmni": true, + "kernel.shm_rmid_forced": true, + } + + var ( + netOnce sync.Once + hostnet bool + hostnetErr error + ) + + for s := range config.Sysctl { + s := convertSysctlVariableToDotsSeparator(s) + if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { + if config.Namespaces.Contains(configs.NEWIPC) { + continue + } else { + return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) + } + } + if strings.HasPrefix(s, "net.") { + // Is container using host netns? + // Here "host" means "current", not "initial". + netOnce.Do(func() { + if !config.Namespaces.Contains(configs.NEWNET) { + hostnet = true + return + } + path := config.Namespaces.PathOf(configs.NEWNET) + if path == "" { + // own netns, so hostnet = false + return + } + hostnet, hostnetErr = isHostNetNS(path) + }) + if hostnetErr != nil { + return fmt.Errorf("invalid netns path: %w", hostnetErr) + } + if hostnet { + return fmt.Errorf("sysctl %q not allowed in host network namespace", s) + } + continue + } + if config.Namespaces.Contains(configs.NEWUTS) { + switch s { + case "kernel.domainname": + // This is namespaced and there's no explicit OCI field for it. + continue + case "kernel.hostname": + // This is namespaced but there's a conflicting (dedicated) OCI field for it. + return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname") + } + } + return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) + } + + return nil +} + +func intelrdtCheck(config *configs.Config) error { + if config.IntelRdt != nil { + if config.IntelRdt.ClosID == "." || config.IntelRdt.ClosID == ".." || strings.Contains(config.IntelRdt.ClosID, "/") { + return fmt.Errorf("invalid intelRdt.ClosID %q", config.IntelRdt.ClosID) + } + + if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" { + return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled") + } + if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" { + return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") + } + } + + return nil +} + +func cgroupsCheck(config *configs.Config) error { + c := config.Cgroups + if c == nil { + return nil + } + + if (c.Name != "" || c.Parent != "") && c.Path != "" { + return fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c) + } + + r := c.Resources + if r == nil { + return nil + } + + if !cgroups.IsCgroup2UnifiedMode() && r.Unified != nil { + return cgroups.ErrV1NoUnified + } + + if cgroups.IsCgroup2UnifiedMode() { + _, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) + if err != nil { + return err + } + } + + return nil +} + +func checkBindOptions(m *configs.Mount) error { + if !m.IsBind() { + return nil + } + // We must reject bind-mounts that also have filesystem-specific mount + // options, because the kernel will completely ignore these flags and we + // cannot set them per-mountpoint. + // + // It should be noted that (due to how the kernel caches superblocks), data + // options could also silently ignored for other filesystems even when + // doing a fresh mount, but there is no real way to avoid this (and it + // matches how everything else works). There have been proposals to make it + // possible for userspace to detect this caching, but this wouldn't help + // runc because the behaviour wouldn't even be desirable for most users. + if m.Data != "" { + return errors.New("bind mounts cannot have any filesystem-specific options applied") + } + return nil +} + +func checkIDMapMounts(config *configs.Config, m *configs.Mount) error { + // Make sure MOUNT_ATTR_IDMAP is not set on any of our mounts. This + // attribute is handled differently to all other attributes (through + // m.IDMapping), so make sure we never store it in the actual config. This + // really shouldn't ever happen. + if m.RecAttr != nil && (m.RecAttr.Attr_set|m.RecAttr.Attr_clr)&unix.MOUNT_ATTR_IDMAP != 0 { + return errors.New("mount configuration cannot contain recAttr for MOUNT_ATTR_IDMAP") + } + if !m.IsIDMapped() { + return nil + } + if !m.IsBind() { + return errors.New("id-mapped mounts are only supported for bind-mounts") + } + if config.RootlessEUID { + return errors.New("id-mapped mounts are not supported for rootless containers") + } + if m.IDMapping.UserNSPath == "" { + if len(m.IDMapping.UIDMappings) == 0 || len(m.IDMapping.GIDMappings) == 0 { + return errors.New("id-mapped mounts must have both uid and gid mappings specified") + } + } else { + if m.IDMapping.UIDMappings != nil || m.IDMapping.GIDMappings != nil { + // should never happen + return errors.New("[internal error] id-mapped mounts cannot have both userns_path and uid and gid mappings specified") + } + } + return nil +} + +func mountsWarn(config *configs.Config) error { + for _, m := range config.Mounts { + if !filepath.IsAbs(m.Destination) { + return fmt.Errorf("mount %+v: relative destination path is **deprecated**, using it as relative to /", m) + } + } + return nil +} + +func mountsStrict(config *configs.Config) error { + for _, m := range config.Mounts { + if err := checkBindOptions(m); err != nil { + return fmt.Errorf("invalid mount %+v: %w", m, err) + } + if err := checkIDMapMounts(config, m); err != nil { + return fmt.Errorf("invalid mount %+v: %w", m, err) + } + } + return nil +} + +func isHostNetNS(path string) (bool, error) { + const currentProcessNetns = "/proc/self/ns/net" + + var st1, st2 unix.Stat_t + + if err := unix.Stat(currentProcessNetns, &st1); err != nil { + return false, &os.PathError{Op: "stat", Path: currentProcessNetns, Err: err} + } + if err := unix.Stat(path, &st2); err != nil { + return false, &os.PathError{Op: "stat", Path: path, Err: err} + } + + return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil +} + +// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html +func scheduler(config *configs.Config) error { + s := config.Scheduler + if s == nil { + return nil + } + if s.Policy == "" { + return errors.New("scheduler policy is required") + } + if s.Policy == specs.SchedOther || s.Policy == specs.SchedBatch { + if s.Nice < -20 || s.Nice > 19 { + return fmt.Errorf("invalid scheduler.nice: %d when scheduler.policy is %s", s.Nice, string(s.Policy)) + } + } + if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) { + return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy") + } + if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) { + return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy") + } + return nil +} + +func ioPriority(config *configs.Config) error { + if config.IOPriority == nil { + return nil + } + priority := config.IOPriority.Priority + if priority < 0 || priority > 7 { + return fmt.Errorf("invalid ioPriority.Priority: %d", priority) + } + + switch class := config.IOPriority.Class; class { + case specs.IOPRIO_CLASS_RT, specs.IOPRIO_CLASS_BE, specs.IOPRIO_CLASS_IDLE: + // Valid class, do nothing. + default: + return fmt.Errorf("invalid ioPriority.Class: %q", class) + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go new file mode 100644 index 0000000000..c93151bc6b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go @@ -0,0 +1,164 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "runtime" + + "github.com/containerd/console" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/linux" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/utils" +) + +// checkPtmxHandle checks that the given file handle points to a real +// /dev/pts/ptmx device inode on a real devpts mount. We cannot (trivially) +// check that it is *the* /dev/pts for the container itself, but this is good +// enough. +func checkPtmxHandle(ptmx *os.File) error { + //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs + const ( + PTMX_MAJOR = 5 // from TTYAUX_MAJOR in + PTMX_MINOR = 2 // from mknod_ptmx in fs/devpts/inode.c + PTMX_INO = 2 // from mknod_ptmx in fs/devpts/inode.c + ) + return sys.VerifyInode(ptmx, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { + if statfs.Type != unix.DEVPTS_SUPER_MAGIC { + return fmt.Errorf("ptmx handle is not on a real devpts mount: super magic is %#x", statfs.Type) + } + if stat.Ino != PTMX_INO { + return fmt.Errorf("ptmx handle has wrong inode number: %v", stat.Ino) + } + if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != unix.Mkdev(PTMX_MAJOR, PTMX_MINOR) { + return fmt.Errorf("ptmx handle is not a real char ptmx device: ftype %#x %d:%d", + stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) + } + return nil + }) +} + +func isPtyNoIoctlError(err error) bool { + // The kernel converts -ENOIOCTLCMD to -ENOTTY automatically, but handle + // -EINVAL just in case (which some drivers do, include pty). + return errors.Is(err, unix.EINVAL) || errors.Is(err, unix.ENOTTY) +} + +func getPtyPeer(pty console.Console, unsafePeerPath string, flags int) (*os.File, error) { + peer, err := linux.GetPtyPeer(pty.Fd(), unsafePeerPath, flags) + if err == nil || !isPtyNoIoctlError(err) { + return peer, err + } + + // On pre-TIOCGPTPEER kernels (Linux < 4.13), we need to fallback to using + // the /dev/pts/$n path generated using TIOCGPTN. We can do some validation + // that the inode is correct because the Unix-98 pty has a consistent + // numbering scheme for the device number of the peer. + + peerNum, err := unix.IoctlGetUint32(int(pty.Fd()), unix.TIOCGPTN) + if err != nil { + return nil, fmt.Errorf("get peer number of pty: %w", err) + } + //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs + const ( + UNIX98_PTY_SLAVE_MAJOR = 136 // from + ) + wantPeerDev := unix.Mkdev(UNIX98_PTY_SLAVE_MAJOR, peerNum) + + // Use O_PATH to avoid opening a bad inode before we validate it. + peerHandle, err := os.OpenFile(unsafePeerPath, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + defer peerHandle.Close() + + if err := sys.VerifyInode(peerHandle, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { + if statfs.Type != unix.DEVPTS_SUPER_MAGIC { + return fmt.Errorf("pty peer handle is not on a real devpts mount: super magic is %#x", statfs.Type) + } + if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != wantPeerDev { + return fmt.Errorf("pty peer handle is not the real char device for pty %d: ftype %#x %d:%d", + peerNum, stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) + } + return nil + }); err != nil { + return nil, err + } + + return pathrs.Reopen(peerHandle, flags) +} + +// safeAllocPty returns a new (ptmx, peer pty) allocation for use inside a +// container. +func safeAllocPty() (pty console.Console, peer *os.File, Err error) { + // TODO: Use openat2(RESOLVE_NO_SYMLINKS|RESOLVE_NO_XDEV). + ptmxHandle, err := os.OpenFile("/dev/pts/ptmx", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + defer ptmxHandle.Close() + + if err := checkPtmxHandle(ptmxHandle); err != nil { + return nil, nil, fmt.Errorf("verify ptmx handle: %w", err) + } + + ptyFile, err := pathrs.Reopen(ptmxHandle, unix.O_RDWR|unix.O_NOCTTY) + if err != nil { + return nil, nil, fmt.Errorf("reopen ptmx to get new pty pair: %w", err) + } + // On success, the ownership is transferred to pty. + defer func() { + if Err != nil { + _ = ptyFile.Close() + } + }() + + pty, unsafePeerPath, err := console.NewPtyFromFile(ptyFile) + if err != nil { + return nil, nil, err + } + defer func() { + if Err != nil { + _ = pty.Close() + } + }() + + peer, err = getPtyPeer(pty, unsafePeerPath, unix.O_RDWR|unix.O_NOCTTY) + if err != nil { + return nil, nil, fmt.Errorf("failed to get peer end of newly-allocated console: %w", err) + } + return pty, peer, nil +} + +// mountConsole bind-mounts the provided pty on top of /dev/console so programs +// that operate on /dev/console operate on the correct container pty. +func mountConsole(peerPty *os.File) error { + console, err := os.OpenFile("/dev/console", unix.O_NOFOLLOW|unix.O_CREAT|unix.O_CLOEXEC, 0o666) + if err != nil { + return fmt.Errorf("create /dev/console mount target: %w", err) + } + defer console.Close() + + dstFd, closer := utils.ProcThreadSelfFd(console.Fd()) + defer closer() + + mntSrc := &mountSource{ + Type: mountSourcePlain, + file: peerPty, + } + return mountViaFds(peerPty.Name(), mntSrc, "/dev/console", dstFd, "bind", unix.MS_BIND, "") +} + +// dupStdio replaces stdio with the given peerPty. +func dupStdio(peerPty *os.File) error { + for _, i := range []int{0, 1, 2} { + if err := unix.Dup3(int(peerPty.Fd()), i, 0); err != nil { + return err + } + } + runtime.KeepAlive(peerPty) + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container.go b/vendor/github.com/opencontainers/runc/libcontainer/container.go new file mode 100644 index 0000000000..c4aa99ecf5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container.go @@ -0,0 +1,59 @@ +// Package libcontainer provides a native Go implementation for creating containers +// with namespaces, cgroups, capabilities, and filesystem access controls. +// It allows you to manage the lifecycle of the container performing additional operations +// after the container is created. +package libcontainer + +import ( + "time" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// Status is the status of a container. +type Status int + +const ( + // Created is the status that denotes the container exists but has not been run yet. + Created Status = iota + // Running is the status that denotes the container exists and is running. + Running + // Paused is the status that denotes the container exists, but all its processes are paused. + Paused + // Stopped is the status that denotes the container does not have a created or running process. + Stopped +) + +func (s Status) String() string { + switch s { + case Created: + return "created" + case Running: + return "running" + case Paused: + return "paused" + case Stopped: + return "stopped" + default: + return "unknown" + } +} + +// BaseState represents the platform agnostic pieces relating to a +// running container's state +type BaseState struct { + // ID is the container ID. + ID string `json:"id"` + + // InitProcessPid is the init process id in the parent namespace. + InitProcessPid int `json:"init_process_pid"` + + // InitProcessStartTime is the init process start time in clock cycles since boot time. + InitProcessStartTime uint64 `json:"init_process_start"` + + // Created is the unix timestamp for the creation time of the container in UTC + Created time.Time `json:"created"` + + // Config is the container's configuration. + Config configs.Config `json:"config"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go new file mode 100644 index 0000000000..2dc2b86eb4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -0,0 +1,1199 @@ +package libcontainer + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path" + "path/filepath" + "reflect" + "strconv" + "strings" + "sync" + "time" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/exeseal" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" +) + +const stdioFdCount = 3 + +// Container is a libcontainer container object. +type Container struct { + id string + stateDir string + config *configs.Config + cgroupManager cgroups.Manager + intelRdtManager *intelrdt.Manager + initProcess parentProcess + initProcessStartTime uint64 + m sync.Mutex + criuVersion int + state containerState + created time.Time + fifo *os.File +} + +// State represents a running container's state +type State struct { + BaseState + + // Platform specific fields below here + + // Specified if the container was started under the rootless mode. + // Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups + Rootless bool `json:"rootless"` + + // Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths + // + // For cgroup v1, a key is cgroup subsystem name, and the value is the path + // to the cgroup for this subsystem. + // + // For cgroup v2 unified hierarchy, a key is "", and the value is the unified path. + CgroupPaths map[string]string `json:"cgroup_paths"` + + // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type + // with the value as the path. + NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` + + // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore + ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` +} + +// ID returns the container's unique ID +func (c *Container) ID() string { + return c.id +} + +// Config returns the container's configuration +func (c *Container) Config() configs.Config { + return *c.config +} + +// Status returns the current status of the container. +func (c *Container) Status() (Status, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentStatus() +} + +// State returns the current container's state information. +func (c *Container) State() (*State, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentState(), nil +} + +// OCIState returns the current container's state information. +func (c *Container) OCIState() (*specs.State, error) { + c.m.Lock() + defer c.m.Unlock() + return c.currentOCIState() +} + +// ignoreCgroupError filters out cgroup-related errors that can be ignored, +// because the container is stopped and its cgroup is gone. +func (c *Container) ignoreCgroupError(err error) error { + if err == nil { + return nil + } + if errors.Is(err, os.ErrNotExist) && !c.hasInit() && !c.cgroupManager.Exists() { + return nil + } + return err +} + +// Processes returns the PIDs inside this container. The PIDs are in the +// namespace of the calling process. +// +// Some of the returned PIDs may no longer refer to processes in the container, +// unless the container state is PAUSED in which case every PID in the slice is +// valid. +func (c *Container) Processes() ([]int, error) { + pids, err := c.cgroupManager.GetAllPids() + if err = c.ignoreCgroupError(err); err != nil { + return nil, fmt.Errorf("unable to get all container pids: %w", err) + } + return pids, nil +} + +// Stats returns statistics for the container. +func (c *Container) Stats() (*Stats, error) { + var ( + err error + stats = &Stats{} + ) + if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { + return stats, fmt.Errorf("unable to get container cgroup stats: %w", err) + } + if c.intelRdtManager != nil { + if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil { + return stats, fmt.Errorf("unable to get container Intel RDT stats: %w", err) + } + } + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) + if err != nil { + return stats, fmt.Errorf("unable to get network stats for interface %q: %w", iface.HostInterfaceName, err) + } + stats.Interfaces = append(stats.Interfaces, istats) + } + } + return stats, nil +} + +// Set resources of container as configured. Can be used to change resources +// when the container is running. +func (c *Container) Set(config configs.Config) error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status == Stopped { + return ErrNotRunning + } + if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil { + // Set configs back + if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil { + logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + return err + } + if c.intelRdtManager != nil { + if err := c.intelRdtManager.Set(&config); err != nil { + // Set configs back + if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil { + logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + if err2 := c.intelRdtManager.Set(c.config); err2 != nil { + logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + return err + } + } + // After config setting succeed, update config and states + c.config = &config + _, err = c.updateState(nil) + return err +} + +// Start starts a process inside the container. Returns error if process fails +// to start. You can track process lifecycle with passed Process structure. +func (c *Container) Start(process *Process) error { + c.m.Lock() + defer c.m.Unlock() + return c.start(process) +} + +// Run immediately starts the process inside the container. Returns an error if +// the process fails to start. It does not block waiting for the exec fifo +// after start returns but opens the fifo after start returns. +func (c *Container) Run(process *Process) error { + c.m.Lock() + defer c.m.Unlock() + if err := c.start(process); err != nil { + return err + } + if process.Init { + return c.exec() + } + return nil +} + +// Exec signals the container to exec the users process at the end of the init. +func (c *Container) Exec() error { + c.m.Lock() + defer c.m.Unlock() + return c.exec() +} + +func (c *Container) exec() error { + path := filepath.Join(c.stateDir, execFifoFilename) + pid := c.initProcess.pid() + blockingFifoOpenCh := awaitFifoOpen(path) + for { + select { + case result := <-blockingFifoOpenCh: + return handleFifoResult(result) + + case <-time.After(time.Millisecond * 100): + stat, err := system.Stat(pid) + if err != nil || stat.State == system.Zombie { + // could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check. + // see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete). + if err := handleFifoResult(fifoOpen(path, false)); err != nil { + return errors.New("container process is already dead") + } + return nil + } + } + } +} + +func readFromExecFifo(execFifo io.Reader) error { + data, err := io.ReadAll(execFifo) + if err != nil { + return err + } + if len(data) <= 0 { + return errors.New("cannot start an already running container") + } + return nil +} + +func awaitFifoOpen(path string) <-chan openResult { + fifoOpened := make(chan openResult) + go func() { + result := fifoOpen(path, true) + fifoOpened <- result + }() + return fifoOpened +} + +func fifoOpen(path string, block bool) openResult { + flags := os.O_RDONLY + if !block { + flags |= unix.O_NONBLOCK + } + f, err := os.OpenFile(path, flags, 0) + if err != nil { + return openResult{err: fmt.Errorf("exec fifo: %w", err)} + } + return openResult{file: f} +} + +func handleFifoResult(result openResult) error { + if result.err != nil { + return result.err + } + f := result.file + defer f.Close() + if err := readFromExecFifo(f); err != nil { + return err + } + return os.Remove(f.Name()) +} + +type openResult struct { + file *os.File + err error +} + +func (c *Container) start(process *Process) (retErr error) { + if c.config.Cgroups.Resources.SkipDevices { + return errors.New("can't start container with SkipDevices set") + } + + if c.config.RootlessEUID && len(process.AdditionalGroups) > 0 { + // We cannot set any additional groups in a rootless container + // and thus we bail if the user asked us to do so. + return errors.New("cannot set any additional groups in a rootless container") + } + + if process.Init { + if c.initProcessStartTime != 0 { + return errors.New("container already has init process") + } + if err := c.createExecFifo(); err != nil { + return err + } + defer func() { + if retErr != nil { + c.deleteExecFifo() + } + }() + } + + parent, err := c.newParentProcess(process) + if err != nil { + return fmt.Errorf("unable to create new parent process: %w", err) + } + // We do not need the cloned binaries once the process is spawned. + defer process.closeClonedExes() + + logsDone := parent.forwardChildLogs() + + // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC + // to make sure we don't leak any files into "runc init". Any files to be + // passed to "runc init" through ExtraFiles will get dup2'd by the Go + // runtime and thus their O_CLOEXEC flag will be cleared. This is some + // additional protection against attacks like CVE-2024-21626, by making + // sure we never leak files to "runc init" we didn't intend to. + if err := utils.CloseExecFrom(3); err != nil { + return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err) + } + if err := parent.start(); err != nil { + return fmt.Errorf("unable to start container process: %w", err) + } + + if logsDone != nil { + defer func() { + // Wait for log forwarder to finish. This depends on + // runc init closing the _LIBCONTAINER_LOGPIPE log fd. + err := <-logsDone + if err != nil && retErr == nil { + retErr = fmt.Errorf("unable to forward init logs: %w", err) + } + }() + } + + if process.Init { + c.fifo.Close() + if c.config.HasHook(configs.Poststart) { + s, err := c.currentOCIState() + if err != nil { + return err + } + + if err := c.config.Hooks.Run(configs.Poststart, s); err != nil { + if err := ignoreTerminateErrors(parent.terminate()); err != nil { + logrus.Warn(fmt.Errorf("error running poststart hook: %w", err)) + } + return err + } + } + } + return nil +} + +// Signal sends a specified signal to container's init. +// +// When s is SIGKILL and the container does not have its own PID namespace, all +// the container's processes are killed. In this scenario, the libcontainer +// user may be required to implement a proper child reaper. +func (c *Container) Signal(s os.Signal) error { + c.m.Lock() + defer c.m.Unlock() + + // When a container has its own PID namespace, inside it the init PID + // is 1, and thus it is handled specially by the kernel. In particular, + // killing init with SIGKILL from an ancestor namespace will also kill + // all other processes in that PID namespace (see pid_namespaces(7)). + // + // OTOH, if PID namespace is shared, we should kill all pids to avoid + // leftover processes. Handle this special case here. + if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) { + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + if c.config.RootlessCgroups { // may not have an access to cgroup + logrus.WithError(err).Warn("failed to kill all processes, possibly due to lack of cgroup (Hint: enable cgroup v2 delegation)") + // Some processes may leak when cgroup is not delegated + // https://github.com/opencontainers/runc/pull/4395#pullrequestreview-2291179652 + return c.signal(s) + } + // For not rootless container, if there is no init process and no cgroup, + // it means that the container is not running. + if errors.Is(err, ErrCgroupNotExist) && !c.hasInit() { + err = ErrNotRunning + } + return fmt.Errorf("unable to kill all processes: %w", err) + } + return nil + } + + return c.signal(s) +} + +func (c *Container) signal(s os.Signal) error { + // To avoid a PID reuse attack, don't kill non-running container. + if !c.hasInit() { + return ErrNotRunning + } + if err := c.initProcess.signal(s); err != nil { + return fmt.Errorf("unable to signal init: %w", err) + } + if s == unix.SIGKILL { + // For cgroup v1, killing a process in a frozen cgroup + // does nothing until it's thawed. Only thaw the cgroup + // for SIGKILL. + if paused, _ := c.isPaused(); paused { + _ = c.cgroupManager.Freeze(cgroups.Thawed) + } + } + return nil +} + +func (c *Container) createExecFifo() (retErr error) { + rootuid, err := c.config.HostRootUID() + if err != nil { + return err + } + rootgid, err := c.config.HostRootGID() + if err != nil { + return err + } + + fifoName := filepath.Join(c.stateDir, execFifoFilename) + if err := unix.Mkfifo(fifoName, 0o622); err != nil { + return &os.PathError{Op: "mkfifo", Path: fifoName, Err: err} + } + defer func() { + if retErr != nil { + os.Remove(fifoName) + } + }() + // Ensure permission bits (can be different because of umask). + if err := os.Chmod(fifoName, 0o622); err != nil { + return err + } + return os.Chown(fifoName, rootuid, rootgid) +} + +func (c *Container) deleteExecFifo() { + fifoName := filepath.Join(c.stateDir, execFifoFilename) + os.Remove(fifoName) +} + +// includeExecFifo opens the container's execfifo as a pathfd, so that the +// container cannot access the statedir (and the FIFO itself remains +// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited +// fd, with _LIBCONTAINER_FIFOFD set to its fd number. +func (c *Container) includeExecFifo(cmd *exec.Cmd) error { + fifoName := filepath.Join(c.stateDir, execFifoFilename) + fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return err + } + c.fifo = fifo + + cmd.ExtraFiles = append(cmd.ExtraFiles, fifo) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) + return nil +} + +func (c *Container) newParentProcess(p *Process) (parentProcess, error) { + comm, err := newProcessComm() + if err != nil { + return nil, err + } + + // Make sure we use a new safe copy of /proc/self/exe binary each time, this + // is called to make sure that if a container manages to overwrite the file, + // it cannot affect other containers on the system. For runc, this code will + // only ever be called once, but libcontainer users might call this more than + // once. + p.closeClonedExes() + var ( + exePath string + safeExe *os.File + ) + if exeseal.IsSelfExeCloned() { + // /proc/self/exe is already a cloned binary -- no need to do anything + logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!") + // We don't need to use /proc/thread-self here because the exe mm of a + // thread-group is guaranteed to be the same for all threads by + // definition. This lets us avoid having to do runtime.LockOSThread. + exePath = "/proc/self/exe" + } else { + var err error + safeExe, err = exeseal.CloneSelfExe(c.stateDir) + if err != nil { + return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err) + } + exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) + p.clonedExes = append(p.clonedExes, safeExe) + logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests + } + + cmd := exec.Command(exePath, "init") + cmd.Args[0] = os.Args[0] + cmd.Stdin = p.Stdin + cmd.Stdout = p.Stdout + cmd.Stderr = p.Stderr + cmd.Dir = c.config.Rootfs + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = &unix.SysProcAttr{} + } + cmd.Env = append(cmd.Env, "GOMAXPROCS="+os.Getenv("GOMAXPROCS")) + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...) + if p.ConsoleSocket != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_CONSOLE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), + ) + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, comm.initSockChild) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), + ) + cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File()) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), + ) + + cmd.ExtraFiles = append(cmd.ExtraFiles, comm.logPipeChild) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) + if p.LogLevel != "" { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel) + } + + if p.PidfdSocket != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket) + cmd.Env = append(cmd.Env, + "_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), + ) + } + + // TODO: After https://go-review.googlesource.com/c/go/+/515799 included + // in go versions supported by us, we can remove this logic. + if safeExe != nil { + // Due to a Go stdlib bug, we need to add safeExe to the set of + // ExtraFiles otherwise it is possible for the stdlib to clobber the fd + // during forkAndExecInChild1 and replace it with some other file that + // might be malicious. This is less than ideal (because the descriptor + // will be non-O_CLOEXEC) however we have protections in "runc init" to + // stop us from leaking extra file descriptors. + // + // See . + cmd.ExtraFiles = append(cmd.ExtraFiles, safeExe) + + // There is a race situation when we are opening a file, if there is a + // small fd was closed at that time, maybe it will be reused by safeExe. + // Because of Go stdlib fds shuffling bug, if the fd of safeExe is too + // small, go stdlib will dup3 it to another fd, or dup3 a other fd to this + // fd, then it will cause the fd type cmd.Path refers to a random path, + // and it can lead to an error "permission denied" when starting the process. + // Please see #4294. + // So we should not use the original fd of safeExe, but use the fd after + // shuffled by Go stdlib. Because Go stdlib will guarantee this fd refers to + // the correct file. + cmd.Path = "/proc/self/fd/" + strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1) + } + + // NOTE: when running a container with no PID namespace and the parent + // process spawning the container is PID1 the pdeathsig is being + // delivered to the container's init process by the kernel for some + // reason even with the parent still running. + if c.config.ParentDeathSignal > 0 { + cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal) + } + + if p.Init { + // We only set up fifoFd if we're not doing a `runc exec`. The historic + // reason for this is that previously we would pass a dirfd that allowed + // for container rootfs escape (and not doing it in `runc exec` avoided + // that problem), but we no longer do that. However, there's no need to do + // this for `runc exec` so we just keep it this way to be safe. + if err := c.includeExecFifo(cmd); err != nil { + return nil, fmt.Errorf("unable to setup exec fifo: %w", err) + } + return c.newInitProcess(p, cmd, comm) + } + return c.newSetnsProcess(p, cmd, comm) +} + +func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) + nsMaps := make(map[configs.NamespaceType]string) + for _, ns := range c.config.Namespaces { + if ns.Path != "" { + nsMaps[ns.Type] = ns.Path + } + } + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) + if err != nil { + return nil, err + } + + init := &initProcess{ + containerProcess: containerProcess{ + cmd: cmd, + comm: comm, + manager: c.cgroupManager, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, + container: c, + }, + intelRdtManager: c.intelRdtManager, + } + c.initProcess = init + return init, nil +} + +func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) { + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) + state := c.currentState() + // for setns process, we don't have to set cloneflags as the process namespaces + // will only be set via setns syscall + data, err := c.bootstrapData(0, state.NamespacePaths) + if err != nil { + return nil, err + } + proc := &setnsProcess{ + containerProcess: containerProcess{ + cmd: cmd, + comm: comm, + manager: c.cgroupManager, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, + container: c, + }, + cgroupPaths: state.CgroupPaths, + rootlessCgroups: c.config.RootlessCgroups, + intelRdtPath: state.IntelRdtPath, + initProcessPid: state.InitProcessPid, + } + if len(p.SubCgroupPaths) > 0 { + if add, ok := p.SubCgroupPaths[""]; ok { + // cgroup v1: using the same path for all controllers. + // cgroup v2: the only possible way. + for k := range proc.cgroupPaths { + subPath := path.Join(proc.cgroupPaths[k], add) + if !strings.HasPrefix(subPath, proc.cgroupPaths[k]) { + return nil, fmt.Errorf("%s is not a sub cgroup path", add) + } + proc.cgroupPaths[k] = subPath + } + // cgroup v2: do not try to join init process's cgroup + // as a fallback (see (*setnsProcess).start). + proc.initProcessPid = 0 + } else { + // Per-controller paths. + for ctrl, add := range p.SubCgroupPaths { + if val, ok := proc.cgroupPaths[ctrl]; ok { + subPath := path.Join(val, add) + if !strings.HasPrefix(subPath, val) { + return nil, fmt.Errorf("%s is not a sub cgroup path", add) + } + proc.cgroupPaths[ctrl] = subPath + } else { + return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl) + } + } + } + } + return proc, nil +} + +func (c *Container) newInitConfig(process *Process) *initConfig { + // Set initial properties. For those properties that exist + // both in the container config and the process, use the ones + // from the container config first, and override them later. + cfg := &initConfig{ + Config: c.config, + Args: process.Args, + Env: process.Env, + UID: process.UID, + GID: process.GID, + AdditionalGroups: process.AdditionalGroups, + Cwd: process.Cwd, + Capabilities: c.config.Capabilities, + PassedFilesCount: len(process.ExtraFiles), + ContainerID: c.ID(), + NoNewPrivileges: c.config.NoNewPrivileges, + AppArmorProfile: c.config.AppArmorProfile, + ProcessLabel: c.config.ProcessLabel, + Rlimits: c.config.Rlimits, + IOPriority: c.config.IOPriority, + Scheduler: c.config.Scheduler, + CPUAffinity: c.config.ExecCPUAffinity, + CreateConsole: process.ConsoleSocket != nil, + ConsoleWidth: process.ConsoleWidth, + ConsoleHeight: process.ConsoleHeight, + } + + // Overwrite config properties with ones from process. + + if process.Capabilities != nil { + cfg.Capabilities = process.Capabilities + } + if process.NoNewPrivileges != nil { + cfg.NoNewPrivileges = *process.NoNewPrivileges + } + if process.AppArmorProfile != "" { + cfg.AppArmorProfile = process.AppArmorProfile + } + if process.Label != "" { + cfg.ProcessLabel = process.Label + } + if len(process.Rlimits) > 0 { + cfg.Rlimits = process.Rlimits + } + if process.IOPriority != nil { + cfg.IOPriority = process.IOPriority + } + if process.Scheduler != nil { + cfg.Scheduler = process.Scheduler + } + if process.CPUAffinity != nil { + cfg.CPUAffinity = process.CPUAffinity + } + + // Set misc properties. + + if cgroups.IsCgroup2UnifiedMode() { + cfg.Cgroup2Path = c.cgroupManager.Path("") + } + + return cfg +} + +// Destroy destroys the container, if its in a valid state. +// +// Any event registrations are removed before the container is destroyed. +// No error is returned if the container is already destroyed. +// +// Running containers must first be stopped using Signal. +// Paused containers must first be resumed using Resume. +func (c *Container) Destroy() error { + c.m.Lock() + defer c.m.Unlock() + if err := c.state.destroy(); err != nil { + return fmt.Errorf("unable to destroy container: %w", err) + } + return nil +} + +// Pause pauses the container, if its state is RUNNING or CREATED, changing +// its state to PAUSED. If the state is already PAUSED, does nothing. +func (c *Container) Pause() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + switch status { + case Running, Created: + if err := c.cgroupManager.Freeze(cgroups.Frozen); err != nil { + return err + } + return c.state.transition(&pausedState{ + c: c, + }) + } + return ErrNotRunning +} + +// Resume resumes the execution of any user processes in the +// container before setting the container state to RUNNING. +// This is only performed if the current state is PAUSED. +// If the Container state is RUNNING, does nothing. +func (c *Container) Resume() error { + c.m.Lock() + defer c.m.Unlock() + status, err := c.currentStatus() + if err != nil { + return err + } + if status != Paused { + return ErrNotPaused + } + if err := c.cgroupManager.Freeze(cgroups.Thawed); err != nil { + return err + } + return c.state.transition(&runningState{ + c: c, + }) +} + +// NotifyOOM returns a read-only channel signaling when the container receives +// an OOM notification. +func (c *Container) NotifyOOM() (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.RootlessCgroups { + logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") + } + path := c.cgroupManager.Path("memory") + if cgroups.IsCgroup2UnifiedMode() { + return notifyOnOOMV2(path) + } + return notifyOnOOM(path) +} + +// NotifyMemoryPressure returns a read-only channel signaling when the +// container reaches a given pressure level. +func (c *Container) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.RootlessCgroups { + logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups") + } + return notifyMemoryPressure(c.cgroupManager.Path("memory"), level) +} + +func (c *Container) updateState(process parentProcess) (*State, error) { + if process != nil { + c.initProcess = process + } + state := c.currentState() + if err := c.saveState(state); err != nil { + return nil, err + } + return state, nil +} + +func (c *Container) saveState(s *State) (retErr error) { + tmpFile, err := os.CreateTemp(c.stateDir, "state-") + if err != nil { + return err + } + + defer func() { + if retErr != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + } + }() + + err = utils.WriteJSON(tmpFile, s) + if err != nil { + return err + } + err = tmpFile.Close() + if err != nil { + return err + } + + stateFilePath := filepath.Join(c.stateDir, stateFilename) + return os.Rename(tmpFile.Name(), stateFilePath) +} + +func (c *Container) currentStatus() (Status, error) { + if err := c.refreshState(); err != nil { + return -1, err + } + return c.state.status(), nil +} + +// refreshState needs to be called to verify that the current state on the +// container is what is true. Because consumers of libcontainer can use it +// out of process we need to verify the container's status based on runtime +// information and not rely on our in process info. +func (c *Container) refreshState() error { + paused, err := c.isPaused() + if err != nil { + return err + } + if paused { + return c.state.transition(&pausedState{c: c}) + } + if !c.hasInit() { + return c.state.transition(&stoppedState{c: c}) + } + // The presence of exec fifo helps to distinguish between + // the created and the running states. + if _, err := os.Stat(filepath.Join(c.stateDir, execFifoFilename)); err == nil { + return c.state.transition(&createdState{c: c}) + } + return c.state.transition(&runningState{c: c}) +} + +// hasInit tells whether the container init process exists. +func (c *Container) hasInit() bool { + if c.initProcess == nil { + return false + } + pid := c.initProcess.pid() + stat, err := system.Stat(pid) + if err != nil { + return false + } + if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { + return false + } + return true +} + +func (c *Container) isPaused() (bool, error) { + state, err := c.cgroupManager.GetFreezerState() + if err != nil { + return false, err + } + return state == cgroups.Frozen, nil +} + +func (c *Container) currentState() *State { + var ( + startTime uint64 + externalDescriptors []string + pid = -1 + ) + if c.initProcess != nil { + pid = c.initProcess.pid() + startTime, _ = c.initProcess.startTime() + externalDescriptors = c.initProcess.externalDescriptors() + } + + intelRdtPath := "" + if c.intelRdtManager != nil { + intelRdtPath = c.intelRdtManager.GetPath() + } + state := &State{ + BaseState: BaseState{ + ID: c.ID(), + Config: *c.config, + InitProcessPid: pid, + InitProcessStartTime: startTime, + Created: c.created, + }, + Rootless: c.config.RootlessEUID && c.config.RootlessCgroups, + CgroupPaths: c.cgroupManager.GetPaths(), + IntelRdtPath: intelRdtPath, + NamespacePaths: make(map[configs.NamespaceType]string), + ExternalDescriptors: externalDescriptors, + } + if pid > 0 { + for _, ns := range c.config.Namespaces { + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + for _, nsType := range configs.NamespaceTypes() { + if !configs.IsNamespaceSupported(nsType) { + continue + } + if _, ok := state.NamespacePaths[nsType]; !ok { + ns := configs.Namespace{Type: nsType} + state.NamespacePaths[ns.Type] = ns.GetPath(pid) + } + } + } + return state +} + +func (c *Container) currentOCIState() (*specs.State, error) { + bundle, annotations := utils.Annotations(c.config.Labels) + state := &specs.State{ + Version: specs.Version, + ID: c.ID(), + Bundle: bundle, + Annotations: annotations, + } + status, err := c.currentStatus() + if err != nil { + return nil, err + } + state.Status = specs.ContainerState(status.String()) + if status != Stopped { + if c.initProcess != nil { + state.Pid = c.initProcess.pid() + } + } + return state, nil +} + +// orderNamespacePaths sorts namespace paths into a list of paths that we +// can setns in order. +func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { + paths := []string{} + for _, ns := range configs.NamespaceTypes() { + + // Remove namespaces that we don't need to join. + if !c.config.Namespaces.Contains(ns) { + continue + } + + if p, ok := namespaces[ns]; ok && p != "" { + // check if the requested namespace is supported + if !configs.IsNamespaceSupported(ns) { + return nil, fmt.Errorf("namespace %s is not supported", ns) + } + // only set to join this namespace if it exists + if _, err := os.Lstat(p); err != nil { + return nil, fmt.Errorf("namespace path: %w", err) + } + // do not allow namespace path with comma as we use it to separate + // the namespace paths + if strings.ContainsRune(p, ',') { + return nil, fmt.Errorf("invalid namespace path %s", p) + } + paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p)) + } + + } + + return paths, nil +} + +func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { + data := bytes.NewBuffer(nil) + for _, im := range idMap { + line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size) + if _, err := data.WriteString(line); err != nil { + return nil, err + } + } + return data.Bytes(), nil +} + +// netlinkError is an error wrapper type for use by custom netlink message +// types. Panics with errors are wrapped in netlinkError so that the recover +// in bootstrapData can distinguish intentional panics. +type netlinkError struct{ error } + +// bootstrapData encodes the necessary data in netlink binary format +// as a io.Reader. +// Consumer can write the data to a bootstrap program +// such as one that uses nsenter package to bootstrap the container's +// init process correctly, i.e. with correct namespaces, uid/gid +// mapping etc. +func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (_ io.Reader, Err error) { + // create the netlink message + r := nl.NewNetlinkRequest(int(InitMsg), 0) + + // Our custom messages cannot bubble up an error using returns, instead + // they will panic with the specific error type, netlinkError. In that + // case, recover from the panic and return that as an error. + defer func() { + if r := recover(); r != nil { + if e, ok := r.(netlinkError); ok { + Err = e.error + } else { + panic(r) + } + } + }() + + // write cloneFlags + r.AddData(&Int32msg{ + Type: CloneFlagsAttr, + Value: uint32(cloneFlags), + }) + + // write custom namespace paths + if len(nsMaps) > 0 { + nsPaths, err := c.orderNamespacePaths(nsMaps) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: NsPathsAttr, + Value: []byte(strings.Join(nsPaths, ",")), + }) + } + + // write namespace paths only when we are not joining an existing user ns + _, joinExistingUser := nsMaps[configs.NEWUSER] + if !joinExistingUser { + // write uid mappings + if len(c.config.UIDMappings) > 0 { + if c.config.RootlessEUID { + // We resolve the paths for new{u,g}idmap from + // the context of runc to avoid doing a path + // lookup in the nsexec context. + if path, err := exec.LookPath("newuidmap"); err == nil { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(path), + }) + } + } + b, err := encodeIDMapping(c.config.UIDMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: UidmapAttr, + Value: b, + }) + } + + // write gid mappings + if len(c.config.GIDMappings) > 0 { + b, err := encodeIDMapping(c.config.GIDMappings) + if err != nil { + return nil, err + } + r.AddData(&Bytemsg{ + Type: GidmapAttr, + Value: b, + }) + if c.config.RootlessEUID { + if path, err := exec.LookPath("newgidmap"); err == nil { + r.AddData(&Bytemsg{ + Type: GidmapPathAttr, + Value: []byte(path), + }) + } + } + if requiresRootOrMappingTool(c.config) { + r.AddData(&Boolmsg{ + Type: SetgroupAttr, + Value: true, + }) + } + } + } + + if c.config.OomScoreAdj != nil { + // write oom_score_adj + r.AddData(&Bytemsg{ + Type: OomScoreAdjAttr, + Value: []byte(strconv.Itoa(*c.config.OomScoreAdj)), + }) + } + + // write rootless + r.AddData(&Boolmsg{ + Type: RootlessEUIDAttr, + Value: c.config.RootlessEUID, + }) + + // write boottime and monotonic time ns offsets only when we are not joining an existing time ns + _, joinExistingTime := nsMaps[configs.NEWTIME] + if !joinExistingTime && c.config.TimeOffsets != nil { + var offsetSpec bytes.Buffer + for clock, offset := range c.config.TimeOffsets { + fmt.Fprintf(&offsetSpec, "%s %d %d\n", clock, offset.Secs, offset.Nanosecs) + } + r.AddData(&Bytemsg{ + Type: TimeOffsetsAttr, + Value: offsetSpec.Bytes(), + }) + } + + return bytes.NewReader(r.Serialize()), nil +} + +// ignoreTerminateErrors returns nil if the given err matches an error known +// to indicate that the terminate occurred successfully or err was nil, otherwise +// err is returned unaltered. +func ignoreTerminateErrors(err error) error { + if err == nil { + return nil + } + // terminate() might return an error from either Kill or Wait. + // The (*Cmd).Wait documentation says: "If the command fails to run + // or doesn't complete successfully, the error is of type *ExitError". + // Filter out such errors (like "exit status 1" or "signal: killed"). + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return nil + } + if errors.Is(err, os.ErrProcessDone) { + return nil + } + s := err.Error() + if strings.Contains(s, "Wait was already called") { + return nil + } + return err +} + +func requiresRootOrMappingTool(c *configs.Config) bool { + gidMap := []configs.IDMap{ + {ContainerID: 0, HostID: int64(os.Getegid()), Size: 1}, + } + return !reflect.DeepEqual(c.GIDMappings, gidMap) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_disabled_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_disabled_linux.go new file mode 100644 index 0000000000..28c4ad1664 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_disabled_linux.go @@ -0,0 +1,15 @@ +//go:build runc_nocriu + +package libcontainer + +import "errors" + +var ErrNoCR = errors.New("this runc binary has not been compiled with checkpoint/restore support enabled (runc_nocriu)") + +func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { + return ErrNoCR +} + +func (c *Container) Checkpoint(criuOpts *CriuOpts) error { + return ErrNoCR +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_linux.go new file mode 100644 index 0000000000..53a0202ad4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_linux.go @@ -0,0 +1,1202 @@ +//go:build !runc_nocriu + +package libcontainer + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "reflect" + "strings" + "time" + + "github.com/checkpoint-restore/go-criu/v6" + criurpc "github.com/checkpoint-restore/go-criu/v6/rpc" + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + "google.golang.org/protobuf/proto" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/utils" +) + +var criuFeatures *criurpc.CriuFeatures + +var ErrCriuMissingFeatures = errors.New("criu is missing features") + +func (c *Container) checkCriuFeatures(criuOpts *CriuOpts, criuFeat *criurpc.CriuFeatures) error { + t := criurpc.CriuReqType_FEATURE_CHECK + + // make sure the features we are looking for are really not from + // some previous check + criuFeatures = nil + + req := &criurpc.CriuReq{ + Type: &t, + Features: criuFeat, + } + + err := c.criuSwrk(nil, req, criuOpts, nil) + if err != nil { + return fmt.Errorf("CRIU feature check failed: %w", err) + } + + var missingFeatures []string + + // The outer if checks if the fields actually exist + if (criuFeat.MemTrack != nil) && + (criuFeatures.MemTrack != nil) { + // The inner if checks if they are set to true + if *criuFeat.MemTrack && !*criuFeatures.MemTrack { + missingFeatures = append(missingFeatures, "MemTrack") + logrus.Debugf("CRIU does not support MemTrack") + } + } + + // This needs to be repeated for every new feature check. + // Is there a way to put this in a function. Reflection? + if (criuFeat.LazyPages != nil) && + (criuFeatures.LazyPages != nil) { + if *criuFeat.LazyPages && !*criuFeatures.LazyPages { + missingFeatures = append(missingFeatures, "LazyPages") + logrus.Debugf("CRIU does not support LazyPages") + } + } + + if len(missingFeatures) != 0 { + return fmt.Errorf("%w: %v", ErrCriuMissingFeatures, missingFeatures) + } + + return nil +} + +func compareCriuVersion(criuVersion int, minVersion int) error { + // simple function to perform the actual version compare + if criuVersion < minVersion { + return fmt.Errorf("CRIU version %d must be %d or higher", criuVersion, minVersion) + } + + return nil +} + +// checkCriuVersion checks CRIU version greater than or equal to minVersion. +func (c *Container) checkCriuVersion(minVersion int) error { + // If the version of criu has already been determined there is no need + // to ask criu for the version again. Use the value from c.criuVersion. + if c.criuVersion != 0 { + return compareCriuVersion(c.criuVersion, minVersion) + } + + criu := criu.MakeCriu() + var err error + c.criuVersion, err = criu.GetCriuVersion() + if err != nil { + return fmt.Errorf("CRIU version check failed: %w", err) + } + + return compareCriuVersion(c.criuVersion, minVersion) +} + +const descriptorsFilename = "descriptors.json" + +func (c *Container) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) + if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil { + mountDest = dest[len(c.config.Rootfs):] + } + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(mountDest), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *Container) addMaskPaths(req *criurpc.CriuReq) error { + for _, path := range c.config.MaskPaths { + fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path)) + if err != nil { + if os.IsNotExist(err) { + continue + } + return err + } + if fi.IsDir() { + continue + } + + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(path), + Val: proto.String("/dev/null"), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) + } + return nil +} + +func (c *Container) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) { + // CRIU will evaluate a configuration starting with release 3.11. + // Settings in the configuration file will overwrite RPC settings. + // Look for annotations. The annotation 'org.criu.config' + // specifies if CRIU should use a different, container specific + // configuration file. + configFile, exists := utils.SearchLabels(c.config.Labels, "org.criu.config") + if exists { + // If the annotation 'org.criu.config' exists and is set + // to a non-empty string, tell CRIU to use that as a + // configuration file. If the file does not exist, CRIU + // will just ignore it. + if configFile != "" { + rpcOpts.ConfigFile = proto.String(configFile) + } + // If 'org.criu.config' exists and is set to an empty + // string, a runc specific CRIU configuration file will + // be not set at all. + } else { + // If the mentioned annotation has not been found, specify + // a default CRIU configuration file. + rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf") + } +} + +func (c *Container) criuSupportsExtNS(t configs.NamespaceType) bool { + var minVersion int + switch t { + case configs.NEWNET: + // CRIU supports different external namespace with different released CRIU versions. + // For network namespaces to work we need at least criu 3.11.0 => 31100. + minVersion = 31100 + case configs.NEWPID: + // For PID namespaces criu 31500 is needed. + minVersion = 31500 + default: + return false + } + return c.checkCriuVersion(minVersion) == nil +} + +func criuNsToKey(t configs.NamespaceType) string { + return "extRoot" + strings.Title(configs.NsName(t)) + "NS" //nolint:staticcheck // SA1019: strings.Title is deprecated +} + +func (c *Container) handleCheckpointingExternalNamespaces(rpcOpts *criurpc.CriuOpts, t configs.NamespaceType) error { + if !c.criuSupportsExtNS(t) { + return fmt.Errorf("criu lacks support for external %s namespace during checkpointing process (old criu version?)", configs.NsName(t)) + } + + nsPath := c.config.Namespaces.PathOf(t) + if nsPath == "" { + return nil + } + // CRIU expects the information about an external namespace + // like this: --external []: + // This is always 'extRootNS'. + var ns unix.Stat_t + if err := unix.Stat(nsPath, &ns); err != nil { + return err + } + criuExternal := fmt.Sprintf("%s[%d]:%s", configs.NsName(t), ns.Ino, criuNsToKey(t)) + rpcOpts.External = append(rpcOpts.External, criuExternal) + + return nil +} + +func (c *Container) handleRestoringNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File) error { + for _, ns := range c.config.Namespaces { + switch ns.Type { + case configs.NEWNET, configs.NEWPID: + // If the container is running in a network or PID namespace and has + // a path to the network or PID namespace configured, we will dump + // that network or PID namespace as an external namespace and we + // will expect that the namespace exists during restore. + // This basically means that CRIU will ignore the namespace + // and expect it to be setup correctly. + if err := c.handleRestoringExternalNamespaces(rpcOpts, extraFiles, ns.Type); err != nil { + return err + } + default: + // For all other namespaces except NET and PID CRIU has + // a simpler way of joining the existing namespace if set + nsPath := c.config.Namespaces.PathOf(ns.Type) + if nsPath == "" { + continue + } + if ns.Type == configs.NEWCGROUP { + // CRIU has no code to handle NEWCGROUP + return fmt.Errorf("Do not know how to handle namespace %v", ns.Type) + } + // CRIU has code to handle NEWTIME, but it does not seem to be defined in runc + + // CRIU will issue a warning for NEWUSER: + // criu/namespaces.c: 'join-ns with user-namespace is not fully tested and dangerous' + rpcOpts.JoinNs = append(rpcOpts.JoinNs, &criurpc.JoinNamespace{ + Ns: proto.String(configs.NsName(ns.Type)), + NsFile: proto.String(nsPath), + }) + } + } + + return nil +} + +func (c *Container) handleRestoringExternalNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File, t configs.NamespaceType) error { + if !c.criuSupportsExtNS(t) { + return fmt.Errorf("criu lacks support for external %s namespace during the restoration process (old criu version?)", configs.NsName(t)) + } + + nsPath := c.config.Namespaces.PathOf(t) + if nsPath == "" { + return nil + } + // CRIU wants the information about an existing namespace + // like this: --inherit-fd fd[]: + // The needs to be the same as during checkpointing. + // We are always using 'extRootNS' as the key in this. + nsFd, err := os.Open(nsPath) + if err != nil { + logrus.Errorf("If a specific network namespace is defined it must exist: %s", err) + return fmt.Errorf("Requested network namespace %v does not exist", nsPath) + } + inheritFd := &criurpc.InheritFd{ + Key: proto.String(criuNsToKey(t)), + // The offset of four is necessary because 0, 1, 2 and 3 are + // already used by stdin, stdout, stderr, 'criu swrk' socket. + Fd: proto.Int32(int32(4 + len(*extraFiles))), + } + rpcOpts.InheritFd = append(rpcOpts.InheritFd, inheritFd) + // All open FDs need to be transferred to CRIU via extraFiles + *extraFiles = append(*extraFiles, nsFd) + + return nil +} + +func (c *Container) Checkpoint(criuOpts *CriuOpts) error { + const logFile = "dump.log" + c.m.Lock() + defer c.m.Unlock() + + // Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) + // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has + // support for doing unprivileged dumps, but the setup of + // rootless containers might make this complicated. + + // We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0 + if err := c.checkCriuVersion(30000); err != nil { + return err + } + + if criuOpts.ImagesDirectory == "" { + return errors.New("invalid directory to save checkpoint") + } + + cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode) + if err != nil { + return err + } + + // Since a container can be C/R'ed multiple times, + // the checkpoint directory may already exist. + if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) { + return err + } + + logDir := criuOpts.ImagesDirectory + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + + rpcOpts := criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + LogLevel: proto.Int32(4), + LogFile: proto.String(logFile), + Root: proto.String(c.config.Rootfs), + ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode. + ManageCgroupsMode: &cgMode, + NotifyScripts: proto.Bool(true), + Pid: proto.Int32(int32(c.initProcess.pid())), + ShellJob: proto.Bool(criuOpts.ShellJob), + LeaveRunning: proto.Bool(criuOpts.LeaveRunning), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + TcpSkipInFlight: proto.Bool(criuOpts.TcpSkipInFlight), + LinkRemap: proto.Bool(criuOpts.LinkRemap), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), + } + + // if criuOpts.WorkDirectory is not set, criu default is used. + if criuOpts.WorkDirectory != "" { + if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { + return err + } + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + rpcOpts.WorkDirFd = proto.Int32(int32(workDir.Fd())) + logDir = criuOpts.WorkDirectory + } + + c.handleCriuConfigurationFile(&rpcOpts) + + // If the container is running in a network namespace and has + // a path to the network namespace configured, we will dump + // that network namespace as an external namespace and we + // will expect that the namespace exists during restore. + // This basically means that CRIU will ignore the namespace + // and expect to be setup correctly. + if err := c.handleCheckpointingExternalNamespaces(&rpcOpts, configs.NEWNET); err != nil { + return err + } + + // Same for possible external PID namespaces + if err := c.handleCheckpointingExternalNamespaces(&rpcOpts, configs.NEWPID); err != nil { + return err + } + + // CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup + // is not set, CRIU uses ptrace() to pause the processes. + // Note cgroup v2 freezer is only supported since CRIU release 3.14. + if !cgroups.IsCgroup2UnifiedMode() || c.checkCriuVersion(31400) == nil { + if fcg := c.cgroupManager.Path("freezer"); fcg != "" { + rpcOpts.FreezeCgroup = proto.String(fcg) + } + } + + // append optional criu opts, e.g., page-server and port + if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 { + rpcOpts.Ps = &criurpc.CriuPageServerInfo{ + Address: proto.String(criuOpts.PageServer.Address), + Port: proto.Int32(criuOpts.PageServer.Port), + } + } + + // pre-dump may need parentImage param to complete iterative migration + if criuOpts.ParentImage != "" { + rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) + rpcOpts.TrackMem = proto.Bool(true) + } + + var t criurpc.CriuReqType + if criuOpts.PreDump { + feat := criurpc.CriuFeatures{ + MemTrack: proto.Bool(true), + } + + if err := c.checkCriuFeatures(criuOpts, &feat); err != nil { + return err + } + + t = criurpc.CriuReqType_PRE_DUMP + } else { + t = criurpc.CriuReqType_DUMP + } + + if criuOpts.LazyPages { + // lazy migration requested; check if criu supports it + feat := criurpc.CriuFeatures{ + LazyPages: proto.Bool(true), + } + if err := c.checkCriuFeatures(criuOpts, &feat); err != nil { + return err + } + + if fd := criuOpts.StatusFd; fd != -1 { + // check that the FD is valid + flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0) + if err != nil { + return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err) + } + // and writable + if flags&unix.O_WRONLY == 0 { + return fmt.Errorf("invalid --status-fd argument %d: not writable", fd) + } + + if c.checkCriuVersion(31500) != nil { + // For criu 3.15+, use notifications (see case "status-ready" + // in criuNotifications). Otherwise, rely on criu status fd. + rpcOpts.StatusFd = proto.Int32(int32(fd)) + } + } + } + + req := &criurpc.CriuReq{ + Type: &t, + Opts: &rpcOpts, + } + + // no need to dump all this in pre-dump + if !criuOpts.PreDump { + hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuDumpMount(req, m) + case "cgroup": + if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { + // real mount(s) + continue + } + // a set of "external" bind mounts + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuDumpMount(req, b) + } + } + } + + if err := c.addMaskPaths(req); err != nil { + return err + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuDumpMount(req, m) + } + + // Write the FD info to a file in the image directory + fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) + if err != nil { + return err + } + + err = os.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600) + if err != nil { + return err + } + } + + err = c.criuSwrk(nil, req, criuOpts, nil) + if err != nil { + logCriuErrors(logDir, logFile) + return err + } + return nil +} + +func (c *Container) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { + mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) + if dest, err := securejoin.SecureJoin(c.config.Rootfs, mountDest); err == nil { + mountDest = dest[len(c.config.Rootfs):] + } + extMnt := &criurpc.ExtMountMap{ + Key: proto.String(mountDest), + Val: proto.String(m.Source), + } + req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) +} + +func (c *Container) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { + for _, iface := range c.config.Networks { + switch iface.Type { + case "veth": + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(iface.HostInterfaceName) + veth.IfIn = proto.String(iface.Name) + req.Opts.Veths = append(req.Opts.Veths, veth) + case "loopback": + // Do nothing + } + } + for _, i := range criuOpts.VethPairs { + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(i.HostInterfaceName) + veth.IfIn = proto.String(i.ContainerInterfaceName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } +} + +func isOnTmpfs(path string, mounts []*configs.Mount) bool { + for _, m := range mounts { + if m.Device == "tmpfs" && strings.HasPrefix(path, m.Destination+"/") { + return true + } + } + return false +} + +// prepareCriuRestoreMounts tries to set up the rootfs of the +// container to be restored in the same way runc does it for +// initial container creation. Even for a read-only rootfs container +// runc modifies the rootfs to add mountpoints which do not exist. +// This function also creates missing mountpoints as long as they +// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. +func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { + umounts := []string{} + defer func() { + for _, u := range umounts { + _ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error { + if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil { + if e != unix.EINVAL { + // Ignore EINVAL as it means 'target is not a mount point.' + // It probably has already been unmounted. + logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e) + } + } + return nil + }) + } + }() + // Now go through all mounts and create the required mountpoints. + for _, m := range mounts { + // No cgroup mount point(s) need to be created: + // * for v1, mount points are saved by CRIU because + // /sys/fs/cgroup is a tmpfs mount; + // * for v2, /sys/fs/cgroup is a real mount, but + // the mountpoint appears as soon as /sys is mounted. + if m.Device == "cgroup" { + continue + } + // If the mountpoint is on a tmpfs, skip it as CRIU will + // restore the complete tmpfs content from its checkpoint. + if isOnTmpfs(m.Destination, mounts) { + continue + } + me := mountEntry{Mount: m} + if err := me.createOpenMountpoint(c.config.Rootfs); err != nil { + return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err) + } + if me.dstFile != nil { + defer me.dstFile.Close() + } + // If the mount point is a bind mount, we need to mount + // it now so that runc can create the necessary mount + // points for mounts in bind mounts. + // This also happens during initial container creation. + // Without this CRIU restore will fail + // See: https://github.com/opencontainers/runc/issues/2748 + // It is also not necessary to order the mount points + // because during initial container creation mounts are + // set up in the order they are configured. + if m.Device == "bind" { + if err := utils.WithProcfdFile(me.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "") + }); err != nil { + return err + } + umounts = append(umounts, m.Destination) + } + if me.dstFile != nil { + // As this is being done in a loop, the defer earlier will be + // delayed until all mountpoints are handled -- for a config with + // many mountpoints this could result in a lot of open files. So we + // opportunistically close the file as well as deferring it. + _ = me.dstFile.Close() + } + } + return nil +} + +// Restore restores the checkpointed container to a running state using the +// criu(8) utility. +func (c *Container) Restore(process *Process, criuOpts *CriuOpts) error { + const logFile = "restore.log" + c.m.Lock() + defer c.m.Unlock() + + var extraFiles []*os.File + + // Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). + // (CLI prints a warning) + // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have + // support for unprivileged restore at the moment. + + // We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0 + if err := c.checkCriuVersion(30000); err != nil { + return err + } + if criuOpts.ImagesDirectory == "" { + return errors.New("invalid directory to restore checkpoint") + } + + cgMode, err := criuCgMode(criuOpts.ManageCgroupsMode) + if err != nil { + return err + } + + logDir := criuOpts.ImagesDirectory + imageDir, err := os.Open(criuOpts.ImagesDirectory) + if err != nil { + return err + } + defer imageDir.Close() + // CRIU has a few requirements for a root directory: + // * it must be a mount point + // * its parent must not be overmounted + // c.config.Rootfs is bind-mounted to a temporary directory + // to satisfy these requirements. + root := filepath.Join(c.stateDir, "criu-root") + if err := os.Mkdir(root, 0o755); err != nil { + return err + } + defer os.Remove(root) + root, err = filepath.EvalSymlinks(root) + if err != nil { + return err + } + err = mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return err + } + defer unix.Unmount(root, unix.MNT_DETACH) //nolint: errcheck + t := criurpc.CriuReqType_RESTORE + req := &criurpc.CriuReq{ + Type: &t, + Opts: &criurpc.CriuOpts{ + ImagesDirFd: proto.Int32(int32(imageDir.Fd())), + EvasiveDevices: proto.Bool(true), + LogLevel: proto.Int32(4), + LogFile: proto.String(logFile), + RstSibling: proto.Bool(true), + Root: proto.String(root), + ManageCgroups: proto.Bool(true), // Obsoleted by ManageCgroupsMode. + ManageCgroupsMode: &cgMode, + NotifyScripts: proto.Bool(true), + ShellJob: proto.Bool(criuOpts.ShellJob), + ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), + TcpEstablished: proto.Bool(criuOpts.TcpEstablished), + FileLocks: proto.Bool(criuOpts.FileLocks), + EmptyNs: proto.Uint32(criuOpts.EmptyNs), + OrphanPtsMaster: proto.Bool(true), + AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), + }, + } + + if criuOpts.LsmProfile != "" { + // CRIU older than 3.16 has a bug which breaks the possibility + // to set a different LSM profile. + if err := c.checkCriuVersion(31600); err != nil { + return errors.New("--lsm-profile requires at least CRIU 3.16") + } + req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile) + } + if criuOpts.LsmMountContext != "" { + if err := c.checkCriuVersion(31600); err != nil { + return errors.New("--lsm-mount-context requires at least CRIU 3.16") + } + req.Opts.LsmMountContext = proto.String(criuOpts.LsmMountContext) + } + + if criuOpts.WorkDirectory != "" { + // Since a container can be C/R'ed multiple times, + // the work directory may already exist. + if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { + return err + } + workDir, err := os.Open(criuOpts.WorkDirectory) + if err != nil { + return err + } + defer workDir.Close() + req.Opts.WorkDirFd = proto.Int32(int32(workDir.Fd())) + logDir = criuOpts.WorkDirectory + } + c.handleCriuConfigurationFile(req.Opts) + + if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil { + return err + } + + // This will modify the rootfs of the container in the same way runc + // modifies the container during initial creation. + if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil { + return err + } + + hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuRestoreMount(req, m) + case "cgroup": + if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { + continue + } + // cgroup v1 is a set of bind mounts, unless cgroupns is used + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuRestoreMount(req, b) + } + } + } + + if len(c.config.MaskPaths) > 0 { + m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"} + c.addCriuRestoreMount(req, m) + } + + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuRestoreMount(req, m) + } + + if criuOpts.EmptyNs&unix.CLONE_NEWNET == 0 { + c.restoreNetwork(req, criuOpts) + } + + var ( + fds []string + fdJSON []byte + ) + if fdJSON, err = os.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { + return err + } + + if err := json.Unmarshal(fdJSON, &fds); err != nil { + return err + } + for i := range fds { + if s := fds[i]; strings.Contains(s, "pipe:") { + inheritFd := new(criurpc.InheritFd) + inheritFd.Key = proto.String(s) + inheritFd.Fd = proto.Int32(int32(i)) + req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) + } + } + err = c.criuSwrk(process, req, criuOpts, extraFiles) + if err != nil { + logCriuErrors(logDir, logFile) + } + + // Now that CRIU is done let's close all opened FDs CRIU needed. + for _, fd := range extraFiles { + fd.Close() + } + + return err +} + +// logCriuErrors tries to find and log errors from a criu log file. +// The output is similar to what "grep -n -B5 Error" does. +func logCriuErrors(dir, file string) { + lookFor := []byte("Error") // Print the line that contains this... + const max = 5 + 1 // ... and a few preceding lines. + + logFile := filepath.Join(dir, file) + f, err := os.Open(logFile) + if err != nil { + logrus.Warn(err) + return + } + defer f.Close() + + var lines [max][]byte + var idx, lineNo, printedLineNo int + s := bufio.NewScanner(f) + for s.Scan() { + lineNo++ + lines[idx] = s.Bytes() + idx = (idx + 1) % max + if !bytes.Contains(s.Bytes(), lookFor) { + continue + } + // Found an error. + if printedLineNo == 0 { + logrus.Warnf("--- Quoting %q", logFile) + } else if lineNo-max > printedLineNo { + // Mark the gap. + logrus.Warn("...") + } + // Print the last lines. + for add := 0; add < max; add++ { + i := (idx + add) % max + s := lines[i] + actLineNo := lineNo + add - max + 1 + if len(s) > 0 && actLineNo > printedLineNo { + logrus.Warnf("%d:%s", actLineNo, s) + printedLineNo = actLineNo + } + } + } + if printedLineNo != 0 { + logrus.Warn("---") // End of "Quoting ...". + } + if err := s.Err(); err != nil { + logrus.Warnf("read %q: %v", logFile, err) + } +} + +func (c *Container) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + // need to apply cgroups only on restore + if req.GetType() != criurpc.CriuReqType_RESTORE { + return nil + } + + // XXX: Do we need to deal with this case? AFAIK criu still requires root. + if err := c.cgroupManager.Apply(pid); err != nil { + return err + } + + if err := c.cgroupManager.Set(c.config.Cgroups.Resources); err != nil { + return err + } + + // TODO(@kolyshkin): should we use c.cgroupManager.GetPaths() + // instead of reading /proc/pid/cgroup? + path := fmt.Sprintf("/proc/%d/cgroup", pid) + cgroupsPaths, err := cgroups.ParseCgroupFile(path) + if err != nil { + return err + } + + for c, p := range cgroupsPaths { + cgroupRoot := &criurpc.CgroupRoot{ + Ctrl: proto.String(c), + Path: proto.String(p), + } + req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) + } + + return nil +} + +func (c *Container) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return err + } + + criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") + criuClientFileCon, err := net.FileConn(criuClient) + criuClient.Close() + if err != nil { + return err + } + + criuClientCon := criuClientFileCon.(*net.UnixConn) + defer criuClientCon.Close() + + criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") + defer criuServer.Close() + + if c.criuVersion != 0 { + // If the CRIU Version is still '0' then this is probably + // the initial CRIU run to detect the version. Skip it. + logrus.Debugf("Using CRIU %d", c.criuVersion) + } + cmd := exec.Command("criu", "swrk", "3") + if process != nil { + cmd.Stdin = process.Stdin + cmd.Stdout = process.Stdout + cmd.Stderr = process.Stderr + } + cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer) + if extraFiles != nil { + cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...) + } + + if err := cmd.Start(); err != nil { + return err + } + // we close criuServer so that even if CRIU crashes or unexpectedly exits, runc will not hang. + criuServer.Close() + // cmd.Process will be replaced by a restored init. + criuProcess := cmd.Process + + var criuProcessState *os.ProcessState + defer func() { + if criuProcessState == nil { + criuClientCon.Close() + _, err := criuProcess.Wait() + if err != nil { + logrus.Warnf("wait on criuProcess returned %v", err) + } + } + }() + + if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil { + return err + } + + var extFds []string + if process != nil { + extFds, err = getPipeFds(criuProcess.Pid) + if err != nil { + return err + } + } + + logrus.Debugf("Using CRIU in %s mode", req.GetType().String()) + // In the case of criurpc.CriuReqType_FEATURE_CHECK req.GetOpts() + // should be empty. For older CRIU versions it still will be + // available but empty. criurpc.CriuReqType_VERSION actually + // has no req.GetOpts(). + if logrus.GetLevel() >= logrus.DebugLevel && + (req.GetType() != criurpc.CriuReqType_FEATURE_CHECK && + req.GetType() != criurpc.CriuReqType_VERSION) { + + val := reflect.ValueOf(req.GetOpts()) + v := reflect.Indirect(val) + for i := 0; i < v.NumField(); i++ { + st := v.Type() + name := st.Field(i).Name + if 'A' <= name[0] && name[0] <= 'Z' { + value := val.MethodByName("Get" + name).Call([]reflect.Value{}) + logrus.Debugf("CRIU option %s with value %v", name, value[0]) + } + } + } + data, err := proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + + buf := make([]byte, 10*4096) + oob := make([]byte, 4096) + for { + n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) + if req.Opts != nil && req.Opts.StatusFd != nil { + // Close status_fd as soon as we got something back from criu, + // assuming it has consumed (reopened) it by this time. + // Otherwise it will might be left open forever and whoever + // is waiting on it will wait forever. + fd := int(*req.Opts.StatusFd) + _ = unix.Close(fd) + req.Opts.StatusFd = nil + } + if err != nil { + return err + } + if n == 0 { + return errors.New("unexpected EOF") + } + if n == len(buf) { + return errors.New("buffer is too small") + } + + resp := new(criurpc.CriuResp) + err = proto.Unmarshal(buf[:n], resp) + if err != nil { + return err + } + t := resp.GetType() + if !resp.GetSuccess() { + return fmt.Errorf("criu failed: type %s errno %d", t, resp.GetCrErrno()) + } + + switch t { + case criurpc.CriuReqType_FEATURE_CHECK: + logrus.Debugf("Feature check says: %s", resp) + criuFeatures = resp.GetFeatures() + case criurpc.CriuReqType_NOTIFY: + if err := c.criuNotifications(resp, process, cmd, opts, extFds, oob[:oobn]); err != nil { + return err + } + req = &criurpc.CriuReq{ + Type: &t, + NotifySuccess: proto.Bool(true), + } + data, err = proto.Marshal(req) + if err != nil { + return err + } + _, err = criuClientCon.Write(data) + if err != nil { + return err + } + continue + case criurpc.CriuReqType_RESTORE: + case criurpc.CriuReqType_DUMP: + case criurpc.CriuReqType_PRE_DUMP: + default: + return fmt.Errorf("unable to parse the response %s", resp.String()) + } + + break + } + + _ = criuClientCon.CloseWrite() + // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. + // Here we want to wait only the CRIU process. + criuProcessState, err = criuProcess.Wait() + if err != nil { + return err + } + + // In pre-dump mode CRIU is in a loop and waits for + // the final DUMP command. + // The current runc pre-dump approach, however, is + // start criu in PRE_DUMP once for a single pre-dump + // and not the whole series of pre-dump, pre-dump, ...m, dump + // If we got the message CriuReqType_PRE_DUMP it means + // CRIU was successful and we need to forcefully stop CRIU + if !criuProcessState.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { + return fmt.Errorf("criu failed: %s", criuProcessState) + } + return nil +} + +// lockNetwork blocks any external network activity. +func lockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + + if err := strategy.detach(config); err != nil { + return err + } + } + return nil +} + +func unlockNetwork(config *configs.Config) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err = strategy.attach(config); err != nil { + return err + } + } + return nil +} + +func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error { + notify := resp.GetNotify() + if notify == nil { + return fmt.Errorf("invalid response: %s", resp.String()) + } + script := notify.GetScript() + logrus.Debugf("notify: %s\n", script) + switch script { + case "post-dump": + f, err := os.Create(filepath.Join(c.stateDir, "checkpoint")) //nolint:forbidigo // this is a host-side operation in a runc-controlled directory + if err != nil { + return err + } + f.Close() + case "network-unlock": + if err := unlockNetwork(c.config); err != nil { + return err + } + case "network-lock": + if err := lockNetwork(c.config); err != nil { + return err + } + case "setup-namespaces": + if c.config.HasHook(configs.Prestart, configs.CreateRuntime) { + s, err := c.currentOCIState() + if err != nil { + return nil + } + s.Pid = int(notify.GetPid()) + + if err := c.config.Hooks.Run(configs.Prestart, s); err != nil { + return err + } + if err := c.config.Hooks.Run(configs.CreateRuntime, s); err != nil { + return err + } + } + case "post-restore": + pid := notify.GetPid() + + p, err := os.FindProcess(int(pid)) + if err != nil { + return err + } + cmd.Process = p + + r, err := newRestoredProcess(cmd, fds) + if err != nil { + return err + } + process.ops = r + if err := c.state.transition(&restoredState{ + imageDir: opts.ImagesDirectory, + c: c, + }); err != nil { + return err + } + // create a timestamp indicating when the restored checkpoint was started + c.created = time.Now().UTC() + if !c.config.Namespaces.Contains(configs.NEWTIME) && + configs.IsNamespaceSupported(configs.NEWTIME) && + c.checkCriuVersion(31400) == nil { + // CRIU restores processes into a time namespace. + c.config.Namespaces = append(c.config.Namespaces, + configs.Namespace{Type: configs.NEWTIME}) + } + if _, err := c.updateState(r); err != nil { + return err + } + if err := os.Remove(filepath.Join(c.stateDir, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + logrus.Error(err) + } + } + case "orphan-pts-master": + scm, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return err + } + fds, err := unix.ParseUnixRights(&scm[0]) + if err != nil { + return err + } + + master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") + defer master.Close() + + // While we can access console.master, using the API is a good idea. + if err := utils.SendFile(process.ConsoleSocket, master); err != nil { + return err + } + case "status-ready": + if opts.StatusFd != -1 { + // write \0 to status fd to notify that lazy page server is ready + _, err := unix.Write(opts.StatusFd, []byte{0}) + if err != nil { + logrus.Warnf("can't write \\0 to status fd: %v", err) + } + _ = unix.Close(opts.StatusFd) + opts.StatusFd = -1 + } + } + return nil +} + +func criuCgMode(mode string) (criurpc.CriuCgMode, error) { + switch mode { + case "": + return criurpc.CriuCgMode_DEFAULT, nil + case "soft": + return criurpc.CriuCgMode_SOFT, nil + case "full": + return criurpc.CriuCgMode_FULL, nil + case "strict": + return criurpc.CriuCgMode_STRICT, nil + case "ignore": + return criurpc.CriuCgMode_IGNORE, nil + default: + return 0, errors.New("invalid manage-cgroups-mode value") + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go new file mode 100644 index 0000000000..67e5e3967c --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go @@ -0,0 +1,39 @@ +package libcontainer + +type CriuPageServerInfo struct { + Address string // IP address of CRIU page server + Port int32 // port number of CRIU page server +} + +type VethPairName struct { + ContainerInterfaceName string + HostInterfaceName string +} + +type CriuOpts struct { + ImagesDirectory string // directory for storing image files + WorkDirectory string // directory to cd and write logs/pidfiles/stats to + ParentImage string // directory for storing parent image files in pre-dump and dump + LeaveRunning bool // leave container in running state after checkpoint + TcpEstablished bool // checkpoint/restore established TCP connections + TcpSkipInFlight bool // skip in-flight TCP connections + LinkRemap bool // allow one to link unlinked files back when possible + ExternalUnixConnections bool // allow external unix connections + ShellJob bool // allow to dump and restore shell jobs + FileLocks bool // handle file locks, for safety + PreDump bool // call criu predump to perform iterative checkpoint + PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore + EmptyNs uint32 // don't c/r properties for namespace from this mask + AutoDedup bool // auto deduplication for incremental dumps + LazyPages bool // restore memory pages lazily using userfaultfd + StatusFd int // fd for feedback when lazy server is ready + LsmProfile string // LSM profile used to restore the container + LsmMountContext string // LSM mount context value to use during restore + + // ManageCgroupsMode tells how criu should manage cgroups during + // checkpoint or restore. Possible values are: "soft", "full", + // "strict", "ignore", or "" (empty string) for criu default. + // See https://criu.org/CGroups for more details. + ManageCgroupsMode string +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/env.go b/vendor/github.com/opencontainers/runc/libcontainer/env.go new file mode 100644 index 0000000000..71b2f69d06 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/env.go @@ -0,0 +1,100 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "slices" + "strings" + + "github.com/moby/sys/user" + "github.com/sirupsen/logrus" +) + +// prepareEnv processes a list of environment variables, preparing it +// for direct consumption by unix.Exec. In particular, it: +// - validates each variable is in the NAME=VALUE format and +// contains no \0 (nil) bytes; +// - removes any duplicates (keeping only the last value for each key) +// - sets PATH for the current process, if found in the list; +// - adds HOME to returned environment, if not found in the list, +// or the value is empty. +// +// Returns the prepared environment. +func prepareEnv(env []string, uid int) ([]string, error) { + if env == nil { + return nil, nil + } + var homeIsSet bool + + // Deduplication code based on dedupEnv from Go 1.22 os/exec. + + // Construct the output in reverse order, to preserve the + // last occurrence of each key. + out := make([]string, 0, len(env)) + saw := make(map[string]bool, len(env)) + for n := len(env); n > 0; n-- { + kv := env[n-1] + i := strings.IndexByte(kv, '=') + if i == -1 { + return nil, errors.New("invalid environment variable: missing '='") + } + if i == 0 { + return nil, errors.New("invalid environment variable: name cannot be empty") + } + key := kv[:i] + val := kv[i+1:] + if saw[key] { // Duplicate. + continue + } + saw[key] = true + if strings.IndexByte(kv, 0) >= 0 { + return nil, fmt.Errorf("invalid environment variable %q: contains nul byte (\\x00)", key) + } + if key == "PATH" { + // Needs to be set as it is used for binary lookup. + if err := os.Setenv("PATH", val); err != nil { + return nil, err + } + } + if key == "HOME" { + if val != "" { + homeIsSet = true + } else { + // Don't add empty HOME to the environment, we will override it later. + continue + } + } + out = append(out, kv) + } + // Restore the original order. + slices.Reverse(out) + + // If HOME is not found in env, get it from container's /etc/passwd and add. + if !homeIsSet { + home, err := getUserHome(uid) + if err != nil { + // For backward compatibility, don't return an error, but merely log it. + logrus.WithError(err).Debugf("HOME not set in process.env, and getting UID %d homedir failed", uid) + } + + out = append(out, "HOME="+home) + } + + return out, nil +} + +func getUserHome(uid int) (string, error) { + const defaultHome = "/" // Default value, return this with any error. + + u, err := user.LookupUid(uid) + if err != nil { + // ErrNoPasswdEntries is kinda expected as any UID can be specified. + if errors.Is(err, user.ErrNoPasswdEntries) { + err = nil + } + return defaultHome, err + } + + return u.Home, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/error.go b/vendor/github.com/opencontainers/runc/libcontainer/error.go new file mode 100644 index 0000000000..7f6a5eb463 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/error.go @@ -0,0 +1,14 @@ +package libcontainer + +import "errors" + +var ( + ErrExist = errors.New("container with given ID already exists") + ErrInvalidID = errors.New("invalid container ID format") + ErrNotExist = errors.New("container does not exist") + ErrPaused = errors.New("container paused") + ErrRunning = errors.New("container still running") + ErrNotRunning = errors.New("container not running") + ErrNotPaused = errors.New("container not paused") + ErrCgroupNotExist = errors.New("cgroup not exist") +) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/exeseal/cloned_binary_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/exeseal/cloned_binary_linux.go new file mode 100644 index 0000000000..146408f20d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/exeseal/cloned_binary_linux.go @@ -0,0 +1,263 @@ +package exeseal + +import ( + "errors" + "fmt" + "io" + "os" + "strconv" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/system" +) + +type SealFunc func(**os.File) error + +var ( + _ SealFunc = sealMemfd + _ SealFunc = sealFile +) + +func isExecutable(f *os.File) bool { + if err := unix.Faccessat(int(f.Fd()), "", unix.X_OK, unix.AT_EACCESS|unix.AT_EMPTY_PATH); err == nil { + return true + } else if err == unix.EACCES { + return false + } + path := "/proc/self/fd/" + strconv.Itoa(int(f.Fd())) + if err := unix.Access(path, unix.X_OK); err == nil { + return true + } else if err == unix.EACCES { + return false + } + // Cannot check -- assume it's executable (if not, exec will fail). + logrus.Debugf("cannot do X_OK check on binary %s -- assuming it's executable", f.Name()) + return true +} + +const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE + +func sealMemfd(f **os.File) error { + if err := (*f).Chmod(0o511); err != nil { + return err + } + // Try to set the newer memfd sealing flags, but we ignore + // errors because they are not needed and we want to continue + // to work on older kernels. + fd := (*f).Fd() + + // Skip F_SEAL_FUTURE_WRITE, it is not needed because we alreadu use the + // stronger F_SEAL_WRITE (and is buggy on Linux <5.5 -- see kernel commit + // 05d351102dbe and ). + + // F_SEAL_EXEC -- Linux 6.3 + const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name + _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC) + + // Apply all original memfd seals. + _, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals) + return os.NewSyscallError("fcntl(F_ADD_SEALS)", err) +} + +// Memfd creates a sealable executable memfd (supported since Linux 3.17). +func Memfd(comment string) (*os.File, SealFunc, error) { + file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC) + return file, sealMemfd, err +} + +func sealFile(f **os.File) error { + // When sealing an O_TMPFILE-style descriptor we need to + // re-open the path as O_PATH to clear the existing write + // handle we have. + opath, err := pathrs.Reopen(*f, unix.O_PATH|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("reopen tmpfile: %w", err) + } + _ = (*f).Close() + *f = opath + return nil +} + +// otmpfile creates an open(O_TMPFILE) file in the given directory (supported +// since Linux 3.11). +func otmpfile(dir string) (*os.File, SealFunc, error) { + file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700) + if err != nil { + return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err) + } + // Make sure we actually got an unlinked O_TMPFILE descriptor. + var stat unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { + file.Close() + return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err) + } else if stat.Nlink != 0 { + file.Close() + return nil, nil, errors.New("O_TMPFILE has non-zero nlink") + } + return file, sealFile, err +} + +// mktemp creates a classic unlinked file in the given directory. +func mktemp(dir string) (*os.File, SealFunc, error) { + file, err := os.CreateTemp(dir, "runc.") + if err != nil { + return nil, nil, err + } + // Unlink the file and verify it was unlinked. + if err := os.Remove(file.Name()); err != nil { + return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err) + } + if err := file.Chmod(0o511); err != nil { + return nil, nil, fmt.Errorf("chmod classic tmpfile: %w", err) + } + var stat unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { + return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err) + } else if stat.Nlink != 0 { + return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name()) + } + return file, sealFile, err +} + +func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) { + // First, try an executable memfd (supported since Linux 3.17). + file, sealFn, err = Memfd(comment) + if err == nil { + return file, sealFn, err + } + logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err) + + // The tmpDir here (c.root) might be mounted noexec, so we need a couple of + // fallbacks to try. It's possible that none of these are writable and + // executable, in which case there's nothing we can practically do (other + // than mounting our own executable tmpfs, which would have its own + // issues). + tmpDirs := []string{ + tmpDir, + os.TempDir(), + "/tmp", + ".", + "/bin", + "/", + } + + // Try to fallback to O_TMPFILE (supported since Linux 3.11). + for _, dir := range tmpDirs { + file, sealFn, err = otmpfile(dir) + if err != nil { + continue + } + if !isExecutable(file) { + logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir) + file.Close() + continue + } + return file, sealFn, err + } + logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err) + // Finally, try a classic unlinked temporary file. + for _, dir := range tmpDirs { + file, sealFn, err = mktemp(dir) + if err != nil { + continue + } + if !isExecutable(file) { + logrus.Debugf("tmpdir %s is noexec -- trying a different tmpdir", dir) + file.Close() + continue + } + return file, sealFn, err + } + return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err) +} + +// CloneBinary creates a "sealed" clone of a given binary, which can be used to +// thwart attempts by the container process to gain access to host binaries +// through procfs magic-link shenanigans. For more details on why this is +// necessary, see CVE-2019-5736. +func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) { + logrus.Debugf("cloning %s binary (%d bytes)", name, size) + file, sealFn, err := getSealableFile(name, tmpDir) + if err != nil { + return nil, err + } + copied, err := system.Copy(file, src) + if err != nil { + file.Close() + return nil, fmt.Errorf("copy binary: %w", err) + } else if copied != size { + file.Close() + return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size) + } + if err := sealFn(&file); err != nil { + file.Close() + return nil, fmt.Errorf("could not seal fd: %w", err) + } + return file, nil +} + +// IsCloned returns whether the given file can be guaranteed to be a safe exe. +func IsCloned(exe *os.File) bool { + seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0) + if err != nil { + // /proc/self/exe is probably not a memfd + logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err) + return false + } + // The memfd must have all of the base seals applied. + logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals) + return seals&baseMemfdSeals == baseMemfdSeals +} + +// CloneSelfExe makes a clone of the current process's binary (through +// /proc/self/exe). This binary can then be used for "runc init" in order to +// make sure the container process can never resolve the original runc binary. +// For more details on why this is necessary, see CVE-2019-5736. +func CloneSelfExe(tmpDir string) (*os.File, error) { + // Try to create a temporary overlayfs to produce a readonly version of + // /proc/self/exe that cannot be "unwrapped" by the container. In contrast + // to CloneBinary, this technique does not require any extra memory usage + // and does not have the (fairly noticeable) performance impact of copying + // a large binary file into a memfd. + // + // Based on some basic performance testing, the overlayfs approach has + // effectively no performance overhead (it is on par with both + // MS_BIND+MS_RDONLY and no binary cloning at all) while memfd copying adds + // around ~60% overhead during container startup. + overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir) + if err == nil { + logrus.Debug("runc exeseal: using overlayfs for sealed /proc/self/exe") // used for tests + return overlayFile, nil + } + logrus.WithError(err).Debugf("could not use overlayfs for /proc/self/exe sealing -- falling back to making a temporary copy") + + selfExe, err := os.Open("/proc/self/exe") + if err != nil { + return nil, fmt.Errorf("opening current binary: %w", err) + } + defer selfExe.Close() + + stat, err := selfExe.Stat() + if err != nil { + return nil, fmt.Errorf("checking /proc/self/exe size: %w", err) + } + size := stat.Size() + + return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir) +} + +// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can +// be guaranteed to be safe. This means that it must be a sealed memfd. Other +// types of clones cannot be completely verified as safe. +func IsSelfExeCloned() bool { + selfExe, err := os.Open("/proc/self/exe") + if err != nil { + logrus.Debugf("open /proc/self/exe failed: %v", err) + return false + } + defer selfExe.Close() + return IsCloned(selfExe) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/exeseal/overlayfs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/exeseal/overlayfs_linux.go new file mode 100644 index 0000000000..f585566b6d --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/exeseal/overlayfs_linux.go @@ -0,0 +1,122 @@ +package exeseal + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/utils" +) + +func fsopen(fsName string, flags int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSOPEN_CLOEXEC + fd, err := unix.Fsopen(fsName, flags) + if err != nil { + return nil, os.NewSyscallError("fsopen "+fsName, err) + } + return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil +} + +func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + flags |= unix.FSMOUNT_CLOEXEC + fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) + if err != nil { + return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) + } + runtime.KeepAlive(ctx) // make sure fd is kept alive while it's used + return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil +} + +func escapeOverlayLowerDir(path string) string { + // If the lowerdir path contains ":" we need to escape them, and if there + // were any escape characters already (\) we need to escape those first. + return strings.ReplaceAll(strings.ReplaceAll(path, `\`, `\\`), `:`, `\:`) +} + +// sealedOverlayfs will create an internal overlayfs mount using fsopen() that +// uses the directory containing the binary as a lowerdir and a temporary tmpfs +// as an upperdir. There is no way to "unwrap" this (unlike MS_BIND+MS_RDONLY) +// and so we can create a safe zero-copy sealed version of /proc/self/exe. +// This only works for privileged users and on kernels with overlayfs and +// fsopen() enabled. +// +// TODO: Since Linux 5.11, overlayfs can be created inside user namespaces so +// it is technically possible to create an overlayfs even for rootless +// containers. Unfortunately, this would require some ugly manual CGo+fork +// magic so we can do this later if we feel it's really needed. +func sealedOverlayfs(binPath, tmpDir string) (_ *os.File, Err error) { + // Try to do the superblock creation first to bail out early if we can't + // use this method. + overlayCtx, err := fsopen("overlay", unix.FSOPEN_CLOEXEC) + if err != nil { + return nil, err + } + defer overlayCtx.Close() + + // binPath is going to be /proc/self/exe, so do a readlink to get the real + // path. overlayfs needs the real underlying directory for this protection + // mode to work properly. + if realPath, err := os.Readlink(binPath); err == nil { + binPath = realPath + } + binLowerDirPath, binName := filepath.Split(binPath) + // Escape any ":"s or "\"s in the path. + binLowerDirPath = escapeOverlayLowerDir(binLowerDirPath) + + // Overlayfs requires two lowerdirs in order to run in "lower-only" mode, + // where writes are completely blocked. Ideally we would create a dummy + // tmpfs for this, but it turns out that overlayfs doesn't allow for + // anonymous mountns paths. + // NOTE: I'm working on a patch to fix this but it won't be backported. + dummyLowerDirPath := escapeOverlayLowerDir(tmpDir) + + // Configure the lowerdirs. The binary lowerdir needs to be on the top to + // ensure that a file called "runc" (binName) in the dummy lowerdir doesn't + // mask the binary. + lowerDirStr := binLowerDirPath + ":" + dummyLowerDirPath + if err := unix.FsconfigSetString(int(overlayCtx.Fd()), "lowerdir", lowerDirStr); err != nil { + return nil, fmt.Errorf("fsconfig set overlayfs lowerdir=%s: %w", lowerDirStr, err) + } + + // We don't care about xino (Linux 4.17) but it will be auto-enabled on + // some systems (if /run/runc and /usr/bin are on different filesystems) + // and this produces spurious dmesg log entries. We can safely ignore + // errors when disabling this because we don't actually care about the + // setting and we're just opportunistically disabling it. + _ = unix.FsconfigSetString(int(overlayCtx.Fd()), "xino", "off") + + // Get an actual handle to the overlayfs. + if err := unix.FsconfigCreate(int(overlayCtx.Fd())); err != nil { + return nil, os.NewSyscallError("fsconfig create overlayfs", err) + } + overlayFd, err := fsmount(overlayCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOSUID) + if err != nil { + return nil, err + } + defer overlayFd.Close() + + // Grab a handle to the binary through overlayfs. + exeFile, err := utils.Openat(overlayFd, binName, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open %s from overlayfs (lowerdir=%s): %w", binName, lowerDirStr, err) + } + // NOTE: We would like to check that exeFile is the same as /proc/self/exe, + // except this is a little difficult. Depending on what filesystems the + // layers are on, overlayfs can remap the inode numbers (and it always + // creates its own device numbers -- see ovl_map_dev_ino) so we can't do a + // basic stat-based check. The only reasonable option would be to hash both + // files and compare them, but this would require fully reading both files + // which would produce a similar performance overhead to memfd cloning. + // + // Ultimately, there isn't a real attack to be worried about here. An + // attacker would need to be able to modify files in /usr/sbin (or wherever + // runc lives), at which point they could just replace the runc binary with + // something malicious anyway. + return exeFile, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go new file mode 100644 index 0000000000..94b55eaa85 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go @@ -0,0 +1,219 @@ +package libcontainer + +import ( + "encoding/json" + "errors" + "fmt" + "os" + + securejoin "github.com/cyphar/filepath-securejoin" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/manager" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/utils" +) + +const ( + stateFilename = "state.json" + execFifoFilename = "exec.fifo" +) + +// Create creates a new container with the given id inside a given state +// directory (root), and returns a Container object. +// +// The root is a state directory which many containers can share. It can be +// used later to get the list of containers, or to get information about a +// particular container (see Load). +// +// The id must not be empty and consist of only the following characters: +// ASCII letters, digits, underscore, plus, minus, period. The id must be +// unique and non-existent for the given root path. +func Create(root, id string, config *configs.Config) (*Container, error) { + if root == "" { + return nil, errors.New("root not set") + } + if err := validateID(id); err != nil { + return nil, err + } + if err := validate.Validate(config); err != nil { + return nil, err + } + if err := os.MkdirAll(root, 0o700); err != nil { + return nil, err + } + stateDir, err := securejoin.SecureJoin(root, id) + if err != nil { + return nil, err + } + if _, err := os.Stat(stateDir); err == nil { + return nil, ErrExist + } else if !os.IsNotExist(err) { + return nil, err + } + + cm, err := manager.New(config.Cgroups) + if err != nil { + return nil, err + } + + // Check that cgroup does not exist or empty (no processes). + // Note for cgroup v1 this check is not thorough, as there are multiple + // separate hierarchies, while both Exists() and GetAllPids() only use + // one for "devices" controller (assuming others are the same, which is + // probably true in almost all scenarios). Checking all the hierarchies + // would be too expensive. + if cm.Exists() { + pids, err := cm.GetAllPids() + // Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV. + if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) { + return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) + } + if len(pids) != 0 { + return nil, fmt.Errorf("container's cgroup is not empty: %d process(es) found", len(pids)) + } + } + + // Check that cgroup is not frozen. Do not use Exists() here + // since in cgroup v1 it only checks "devices" controller. + st, err := cm.GetFreezerState() + if err != nil { + return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err) + } + if st == cgroups.Frozen { + return nil, errors.New("container's cgroup unexpectedly frozen") + } + + // Parent directory is already created above, so Mkdir is enough. + if err := os.Mkdir(stateDir, 0o711); err != nil { + return nil, err + } + c := &Container{ + id: id, + stateDir: stateDir, + config: config, + cgroupManager: cm, + intelRdtManager: intelrdt.NewManager(config, id, ""), + } + c.state = &stoppedState{c: c} + return c, nil +} + +// Load takes a path to the state directory (root) and an id of an existing +// container, and returns a Container object reconstructed from the saved +// state. This presents a read only view of the container. +func Load(root, id string) (*Container, error) { + if root == "" { + return nil, errors.New("root not set") + } + // when load, we need to check id is valid or not. + if err := validateID(id); err != nil { + return nil, err + } + stateDir, err := securejoin.SecureJoin(root, id) + if err != nil { + return nil, err + } + state, err := loadState(stateDir) + if err != nil { + return nil, err + } + r := &nonChildProcess{ + processPid: state.InitProcessPid, + processStartTime: state.InitProcessStartTime, + fds: state.ExternalDescriptors, + } + cm, err := manager.NewWithPaths(state.Config.Cgroups, state.CgroupPaths) + if err != nil { + return nil, err + } + c := &Container{ + initProcess: r, + initProcessStartTime: state.InitProcessStartTime, + id: id, + config: &state.Config, + cgroupManager: cm, + intelRdtManager: intelrdt.NewManager(&state.Config, id, state.IntelRdtPath), + stateDir: stateDir, + created: state.Created, + } + c.state = &loadedState{c: c} + if err := c.refreshState(); err != nil { + return nil, err + } + return c, nil +} + +func loadState(root string) (*State, error) { + stateFilePath, err := securejoin.SecureJoin(root, stateFilename) + if err != nil { + return nil, err + } + f, err := os.Open(stateFilePath) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotExist + } + return nil, err + } + defer f.Close() + var state *State + if err := json.NewDecoder(f).Decode(&state); err != nil { + return nil, err + } + // Cgroup v1 fs manager expect Resources to never be nil. + if state.Config.Cgroups.Resources == nil { + state.Config.Cgroups.Resources = &cgroups.Resources{} + } + return state, nil +} + +// validateID checks if the supplied container ID is valid, returning +// the ErrInvalidID in case it is not. +// +// The format of valid ID was never formally defined, instead the code +// was modified to allow or disallow specific characters. +// +// Currently, a valid ID is a non-empty string consisting only of +// the following characters: +// - uppercase (A-Z) and lowercase (a-z) Latin letters; +// - digits (0-9); +// - underscore (_); +// - plus sign (+); +// - minus sign (-); +// - period (.). +// +// In addition, IDs that can't be used to represent a file name +// (such as . or ..) are rejected. + +func validateID(id string) error { + if len(id) < 1 { + return ErrInvalidID + } + + // Allowed characters: 0-9 A-Z a-z _ + - . + for i := 0; i < len(id); i++ { + c := id[i] + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c >= '0' && c <= '9': + case c == '_': + case c == '+': + case c == '-': + case c == '.': + default: + return ErrInvalidID + } + + } + + if string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) { + return ErrInvalidID + } + + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go new file mode 100644 index 0000000000..4052920049 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -0,0 +1,724 @@ +package libcontainer + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "os" + "path/filepath" + "runtime" + "runtime/debug" + "strconv" + "syscall" + + "github.com/containerd/console" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/capabilities" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" +) + +type initType string + +const ( + initSetns initType = "setns" + initStandard initType = "standard" +) + +type pid struct { + Pid int `json:"stage2_pid"` + PidFirstChild int `json:"stage1_pid"` +} + +// network is an internal struct used to setup container networks. +type network struct { + configs.Network + + // TempVethPeerName is a unique temporary veth peer name that was placed into + // the container's namespace. + TempVethPeerName string `json:"temp_veth_peer_name"` +} + +// initConfig is used for transferring parameters from Exec() to Init(). +// It contains: +// - original container config; +// - some [Process] properties; +// - set of properties merged from the container config ([configs.Config]) +// and the process ([Process]); +// - some properties that come from the container. +// +// When adding new fields, please make sure they go into the relevant section. +type initConfig struct { + // Config is the original container config. + Config *configs.Config `json:"config"` + + // Properties that are unique to and come from [Process]. + + Args []string `json:"args"` + Env []string `json:"env"` + UID int `json:"uid"` + GID int `json:"gid"` + AdditionalGroups []int `json:"additional_groups"` + Cwd string `json:"cwd"` + CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` + PassedFilesCount int `json:"passed_files_count"` + + // Properties that exists both in the container config and the process, + // as merged by [Container.newInitConfig] (process properties has preference). + + AppArmorProfile string `json:"apparmor_profile"` + Capabilities *configs.Capabilities `json:"capabilities"` + NoNewPrivileges bool `json:"no_new_privileges"` + ProcessLabel string `json:"process_label"` + Rlimits []configs.Rlimit `json:"rlimits"` + IOPriority *configs.IOPriority `json:"io_priority,omitempty"` + Scheduler *configs.Scheduler `json:"scheduler,omitempty"` + CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"` + + // Miscellaneous properties, filled in by [Container.newInitConfig] + // unless documented otherwise. + + ContainerID string `json:"containerid"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` + + // Networks is filled in from container config by [initProcess.createNetworkInterfaces]. + Networks []*network `json:"network"` + + // SpecState is filled in by [initProcess.Start]. + SpecState *specs.State `json:"spec_state,omitempty"` +} + +// Init is part of "runc init" implementation. +func Init() { + runtime.GOMAXPROCS(1) + runtime.LockOSThread() + + if err := startInitialization(); err != nil { + // If the error is returned, it was not communicated + // back to the parent (which is not a common case), + // so print it to stderr here as a last resort. + // + // Do not use logrus as we are not sure if it has been + // set up yet, but most important, if the parent is + // alive (and its log forwarding is working). + fmt.Fprintln(os.Stderr, err) + } + // Normally, StartInitialization() never returns, meaning + // if we are here, it had failed. + os.Exit(255) +} + +// Normally, this function does not return. If it returns, with or without an +// error, it means the initialization has failed. If the error is returned, +// it means the error can not be communicated back to the parent. +func startInitialization() (retErr error) { + // Get the synchronisation pipe. + envSyncPipe := os.Getenv("_LIBCONTAINER_SYNCPIPE") + syncPipeFd, err := strconv.Atoi(envSyncPipe) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_SYNCPIPE: %w", err) + } + syncPipe := newSyncSocket(os.NewFile(uintptr(syncPipeFd), "sync")) + defer syncPipe.Close() + + defer func() { + // If this defer is ever called, this means initialization has failed. + // Send the error back to the parent process in the form of an initError + // if the sync socket has not been closed. + if syncPipe.isClosed() { + return + } + ierr := initError{Message: retErr.Error()} + if err := writeSyncArg(syncPipe, procError, ierr); err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + // The error is sent, no need to also return it (or it will be reported twice). + retErr = nil + }() + + // Get the INITPIPE. + envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE") + initPipeFd, err := strconv.Atoi(envInitPipe) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE: %w", err) + } + initPipe := os.NewFile(uintptr(initPipeFd), "init") + defer initPipe.Close() + + // Set up logging. This is used rarely, and mostly for init debugging. + + // Passing log level is optional; currently libcontainer/integration does not do it. + if levelStr := os.Getenv("_LIBCONTAINER_LOGLEVEL"); levelStr != "" { + logLevel, err := strconv.Atoi(levelStr) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_LOGLEVEL: %w", err) + } + logrus.SetLevel(logrus.Level(logLevel)) + } + + logFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE")) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err) + } + logPipe := os.NewFile(uintptr(logFd), "logpipe") + + logrus.SetOutput(logPipe) + logrus.SetFormatter(new(logrus.JSONFormatter)) + logrus.Debug("child process in init()") + + // Only init processes have FIFOFD. + var fifoFile *os.File + envInitType := os.Getenv("_LIBCONTAINER_INITTYPE") + it := initType(envInitType) + if it == initStandard { + fifoFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_FIFOFD")) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD: %w", err) + } + fifoFile = os.NewFile(uintptr(fifoFd), "initfifo") + } + + var consoleSocket *os.File + if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" { + console, err := strconv.Atoi(envConsole) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE: %w", err) + } + consoleSocket = os.NewFile(uintptr(console), "console-socket") + defer consoleSocket.Close() + } + + var pidfdSocket *os.File + if envSockFd := os.Getenv("_LIBCONTAINER_PIDFD_SOCK"); envSockFd != "" { + sockFd, err := strconv.Atoi(envSockFd) + if err != nil { + return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD_SOCK: %w", err) + } + pidfdSocket = os.NewFile(uintptr(sockFd), "pidfd-socket") + defer pidfdSocket.Close() + } + + // From here on, we don't need current process environment. It is not + // used directly anywhere below this point, but let's clear it anyway. + os.Clearenv() + + defer func() { + if err := recover(); err != nil { + if err2, ok := err.(error); ok { + retErr = fmt.Errorf("panic from initialization: %w, %s", err2, debug.Stack()) + } else { + retErr = fmt.Errorf("panic from initialization: %v, %s", err, debug.Stack()) + } + } + }() + + var config initConfig + if err := json.NewDecoder(initPipe).Decode(&config); err != nil { + return err + } + + // If init succeeds, it will not return, hence none of the defers will be called. + return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe) +} + +func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe *os.File) error { + // Clean the RLIMIT_NOFILE cache in go runtime. + // Issue: https://github.com/opencontainers/runc/issues/4195 + maybeClearRlimitNofileCache(config.Rlimits) + + switch t { + case initSetns: + i := &linuxSetnsInit{ + pipe: pipe, + consoleSocket: consoleSocket, + pidfdSocket: pidfdSocket, + config: config, + logPipe: logPipe, + } + return i.Init() + case initStandard: + i := &linuxStandardInit{ + pipe: pipe, + consoleSocket: consoleSocket, + pidfdSocket: pidfdSocket, + parentPid: unix.Getppid(), + config: config, + fifoFile: fifoFile, + logPipe: logPipe, + } + return i.Init() + } + return fmt.Errorf("unknown init type %q", t) +} + +// verifyCwd ensures that the current directory is actually inside the mount +// namespace root of the current process. +func verifyCwd() error { + // getcwd(2) on Linux detects if cwd is outside of the rootfs of the + // current mount namespace root, and in that case prefixes "(unreachable)" + // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect + // when this happens and return ENOENT rather than returning a non-absolute + // path. In both cases we can therefore easily detect if we have an invalid + // cwd by checking the return value of getcwd(3). See getcwd(3) for more + // details, and CVE-2024-21626 for the security issue that motivated this + // check. + // + // We have to use unix.Getwd() here because os.Getwd() has a workaround for + // $PWD which involves doing stat(.), which can fail if the current + // directory is inaccessible to the container process. + if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) { + return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected") + } else if err != nil { + return fmt.Errorf("failed to verify if current working directory is safe: %w", err) + } else if !filepath.IsAbs(wd) { + // We shouldn't ever hit this, but check just in case. + return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd) + } + return nil +} + +// finalizeNamespace drops the caps, sets the correct user +// and working dir, and closes any leaked file descriptors +// before executing the command inside the namespace. +func finalizeNamespace(config *initConfig) error { + // Ensure that all unwanted fds we may have accidentally + // inherited are marked close-on-exec so they stay out of the + // container + if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { + return fmt.Errorf("error closing exec fds: %w", err) + } + + // we only do chdir if it's specified + doChdir := config.Cwd != "" + if doChdir { + // First, attempt the chdir before setting up the user. + // This could allow us to access a directory that the user running runc can access + // but the container user cannot. + err := unix.Chdir(config.Cwd) + switch { + case err == nil: + doChdir = false + case os.IsPermission(err): + // If we hit an EPERM, we should attempt again after setting up user. + // This will allow us to successfully chdir if the container user has access + // to the directory, but the user running runc does not. + // This is useful in cases where the cwd is also a volume that's been chowned to the container user. + default: + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) + } + } + + // We should set envs after we are in the jail of the container. + // Please see https://github.com/opencontainers/runc/issues/4688 + env, err := prepareEnv(config.Env, config.UID) + if err != nil { + return err + } + config.Env = env + + w, err := capabilities.New(config.Capabilities) + if err != nil { + return err + } + // drop capabilities in bounding set before changing user + if err := w.ApplyBoundingSet(); err != nil { + return fmt.Errorf("unable to apply bounding set: %w", err) + } + // preserve existing capabilities while we change users + if err := system.SetKeepCaps(); err != nil { + return fmt.Errorf("unable to set keep caps: %w", err) + } + if err := setupUser(config); err != nil { + return fmt.Errorf("unable to setup user: %w", err) + } + // Change working directory AFTER the user has been set up, if we haven't done it yet. + if doChdir { + if err := unix.Chdir(config.Cwd); err != nil { + return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) + } + } + // Make sure our final working directory is inside the container. + if err := verifyCwd(); err != nil { + return err + } + if err := system.ClearKeepCaps(); err != nil { + return fmt.Errorf("unable to clear keep caps: %w", err) + } + if err := w.ApplyCaps(); err != nil { + return fmt.Errorf("unable to apply caps: %w", err) + } + return nil +} + +// setupConsole sets up the console from inside the container, and sends the +// master pty fd to the config.Pipe (using cmsg). This is done to ensure that +// consoles are scoped to a container properly (see runc#814 and the many +// issues related to that). This has to be run *after* we've pivoted to the new +// rootfs (and the users' configuration is entirely set up). +func setupConsole(socket *os.File, config *initConfig, mount bool) error { + defer socket.Close() + // At this point, /dev/ptmx points to something that we would expect. We + // used to change the owner of the slave path, but since the /dev/pts mount + // can have gid=X set (at the users' option). So touching the owner of the + // slave PTY is not necessary, as the kernel will handle that for us. Note + // however, that setupUser (specifically fixStdioPermissions) *will* change + // the UID owner of the console to be the user the process will run as (so + // they can actually control their console). + + pty, peerPty, err := safeAllocPty() + if err != nil { + return err + } + // After we return from here, we don't need the console anymore. + defer pty.Close() + defer peerPty.Close() + + if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { + err = pty.Resize(console.WinSize{ + Height: config.ConsoleHeight, + Width: config.ConsoleWidth, + }) + if err != nil { + return err + } + } + + // Mount the console inside our rootfs. + if mount { + if err := mountConsole(peerPty); err != nil { + return err + } + } + // While we can access console.master, using the API is a good idea. + if err := utils.SendRawFd(socket, pty.Name(), pty.Fd()); err != nil { + return err + } + runtime.KeepAlive(pty) + + // Now, dup over all the things. + return dupStdio(peerPty) +} + +// syncParentReady sends to the given pipe a JSON payload which indicates that +// the init is ready to Exec the child process. It then waits for the parent to +// indicate that it is cleared to Exec. +func syncParentReady(pipe *syncSocket) error { + // Tell parent. + if err := writeSync(pipe, procReady); err != nil { + return err + } + // Wait for parent to give the all-clear. + return readSync(pipe, procRun) +} + +// syncParentHooks sends to the given pipe a JSON payload which indicates that +// the parent should execute pre-start hooks. It then waits for the parent to +// indicate that it is cleared to resume. +func syncParentHooks(pipe *syncSocket) error { + // Tell parent. + if err := writeSync(pipe, procHooks); err != nil { + return err + } + // Wait for parent to give the all-clear. + return readSync(pipe, procHooksDone) +} + +// syncParentSeccomp sends the fd associated with the seccomp file descriptor +// to the parent, and wait for the parent to do pidfd_getfd() to grab a copy. +func syncParentSeccomp(pipe *syncSocket, seccompFd int) error { + if seccompFd == -1 { + return nil + } + defer unix.Close(seccompFd) + + // Tell parent to grab our fd. + // + // Notably, we do not use writeSyncFile here because a container might have + // an SCMP_ACT_NOTIFY action on sendmsg(2) so we need to use the smallest + // possible number of system calls here because all of those syscalls + // cannot be used with SCMP_ACT_NOTIFY as a result (any syscall we use here + // before the parent gets the file descriptor would deadlock "runc init" if + // we allowed it for SCMP_ACT_NOTIFY). See seccomp.InitSeccomp() for more + // details. + if err := writeSyncArg(pipe, procSeccomp, seccompFd); err != nil { + return err + } + // Wait for parent to tell us they've grabbed the seccompfd. + return readSync(pipe, procSeccompDone) +} + +// setupUser changes the groups, gid, and uid for the user inside the container. +func setupUser(config *initConfig) error { + // Before we change to the container's user make sure that the processes + // STDIO is correctly owned by the user that we are switching to. + if err := fixStdioPermissions(config.UID); err != nil { + return err + } + + // We don't need to use /proc/thread-self here because setgroups is a + // per-userns file and thus is global to all threads in a thread-group. + // This lets us avoid having to do runtime.LockOSThread. + var setgroups []byte + setgroupsFile, err := pathrs.ProcSelfOpen("setgroups", unix.O_RDONLY) + if err == nil { + setgroups, err = io.ReadAll(setgroupsFile) + _ = setgroupsFile.Close() + } + if err != nil && !os.IsNotExist(err) { + return err + } + + // This isn't allowed in an unprivileged user namespace since Linux 3.19. + // There's nothing we can do about /etc/group entries, so we silently + // ignore setting groups here (since the user didn't explicitly ask us to + // set the group). + allowSupGroups := !config.Config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" + + if allowSupGroups { + if err := unix.Setgroups(config.AdditionalGroups); err != nil { + return &os.SyscallError{Syscall: "setgroups", Err: err} + } + } + + if err := unix.Setgid(config.GID); err != nil { + if err == unix.EINVAL { + return fmt.Errorf("cannot setgid to unmapped gid %d in user namespace", config.GID) + } + return err + } + if err := unix.Setuid(config.UID); err != nil { + if err == unix.EINVAL { + return fmt.Errorf("cannot setuid to unmapped uid %d in user namespace", config.UID) + } + return err + } + return nil +} + +// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified uid. +// The ownership needs to match because it is created outside of the container and needs to be +// localized. +func fixStdioPermissions(uid int) error { + for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + var s unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &s); err != nil { + return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} + } + + // Skip chown if: + // - uid is already the one we want, or + // - fd is opened to /dev/null. + if int(s.Uid) == uid || isDevNull(&s) { + continue + } + + // We only change the uid (as it is possible for the mount to + // prefer a different gid, and there's no reason for us to change it). + // The reason why we don't just leave the default uid=X mount setup is + // that users expect to be able to actually use their console. Without + // this code, you couldn't effectively run as a non-root user inside a + // container and also have a console set up. + if err := file.Chown(uid, int(s.Gid)); err != nil { + // If we've hit an EINVAL then s.Gid isn't mapped in the user + // namespace. If we've hit an EPERM then the inode's current owner + // is not mapped in our user namespace (in particular, + // privileged_wrt_inode_uidgid() has failed). Read-only + // /dev can result in EROFS error. In any case, it's + // better for us to just not touch the stdio rather + // than bail at this point. + + if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) || errors.Is(err, unix.EROFS) { + continue + } + return err + } + } + return nil +} + +// setupNetwork sets up and initializes any network interface inside the container. +func setupNetwork(config *initConfig) error { + for _, config := range config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + if err := strategy.initialize(config); err != nil { + return err + } + } + return nil +} + +func setupRoute(config *configs.Config) error { + for _, config := range config.Routes { + _, dst, err := net.ParseCIDR(config.Destination) + if err != nil { + return err + } + src := net.ParseIP(config.Source) + if src == nil { + return fmt.Errorf("Invalid source for route: %s", config.Source) + } + gw := net.ParseIP(config.Gateway) + if gw == nil { + return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) + } + l, err := netlink.LinkByName(config.InterfaceName) + if err != nil { + return err + } + route := &netlink.Route{ + Scope: netlink.SCOPE_UNIVERSE, + Dst: dst, + Src: src, + Gw: gw, + LinkIndex: l.Attrs().Index, + } + if err := netlink.RouteAdd(route); err != nil { + return err + } + } + return nil +} + +func maybeClearRlimitNofileCache(limits []configs.Rlimit) { + for _, rlimit := range limits { + if rlimit.Type == syscall.RLIMIT_NOFILE { + system.ClearRlimitNofileCache(&syscall.Rlimit{ + Cur: rlimit.Soft, + Max: rlimit.Hard, + }) + return + } + } +} + +func setupRlimits(limits []configs.Rlimit, pid int) error { + for _, rlimit := range limits { + if err := unix.Prlimit(pid, rlimit.Type, &unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}, nil); err != nil { + return fmt.Errorf("error setting rlimit type %v: %w", rlimit.Type, err) + } + } + return nil +} + +func setupScheduler(config *initConfig) error { + if config.Scheduler == nil { + return nil + } + attr, err := configs.ToSchedAttr(config.Scheduler) + if err != nil { + return err + } + if err := unix.SchedSetAttr(0, attr, 0); err != nil { + if errors.Is(err, unix.EPERM) && config.Config.Cgroups.CpusetCpus != "" { + return errors.New("process scheduler can't be used together with AllowedCPUs") + } + return fmt.Errorf("error setting scheduler: %w", err) + } + return nil +} + +func setupIOPriority(config *initConfig) error { + const ioprioWhoPgrp = 1 + + ioprio := config.IOPriority + if ioprio == nil { + return nil + } + class := 0 + switch ioprio.Class { + case specs.IOPRIO_CLASS_RT: + class = 1 + case specs.IOPRIO_CLASS_BE: + class = 2 + case specs.IOPRIO_CLASS_IDLE: + class = 3 + default: + return fmt.Errorf("invalid io priority class: %s", ioprio.Class) + } + + // Combine class and priority into a single value + // https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17 + iop := (class << 13) | ioprio.Priority + _, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop)) + if errno != 0 { + return fmt.Errorf("failed to set io priority: %w", errno) + } + return nil +} + +func setupPersonality(config *configs.Config) error { + return system.SetLinuxPersonality(config.Personality.Domain) +} + +// signalAllProcesses freezes then iterates over all the processes inside the +// manager's cgroups sending the signal s to them. +func signalAllProcesses(m cgroups.Manager, s unix.Signal) error { + if !m.Exists() { + return ErrCgroupNotExist + } + // Use cgroup.kill, if available. + if s == unix.SIGKILL { + if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid. + err := cgroups.WriteFile(p, "cgroup.kill", "1") + if err == nil || !errors.Is(err, os.ErrNotExist) { + return err + } + // Fallback to old implementation. + } + } + + if err := m.Freeze(cgroups.Frozen); err != nil { + logrus.Warn(err) + } + pids, err := m.GetAllPids() + if err != nil { + if err := m.Freeze(cgroups.Thawed); err != nil { + logrus.Warn(err) + } + return err + } + for _, pid := range pids { + err := unix.Kill(pid, s) + if err != nil && err != unix.ESRCH { + logrus.Warnf("kill %d: %v", pid, err) + } + } + if err := m.Freeze(cgroups.Thawed); err != nil { + logrus.Warn(err) + } + + return nil +} + +// setupPidfd opens a process file descriptor of init process, and sends the +// file descriptor back to the socket. +func setupPidfd(socket *os.File, initType string) error { + defer socket.Close() + + pidFd, err := unix.PidfdOpen(os.Getpid(), 0) + if err != nil { + return fmt.Errorf("failed to pidfd_open: %w", err) + } + + if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil { + unix.Close(pidFd) + return fmt.Errorf("failed to send pidfd on socket: %w", err) + } + return unix.Close(pidFd) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go new file mode 100644 index 0000000000..56cb956ae7 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go @@ -0,0 +1,23 @@ +package intelrdt + +var cmtEnabled bool + +// IsCMTEnabled checks if Intel RDT/CMT is enabled. +func IsCMTEnabled() bool { + featuresInit() + return cmtEnabled +} + +func getCMTNumaNodeStats(numaPath string) (*CMTNumaNodeStats, error) { + stats := &CMTNumaNodeStats{} + + if enabledMonFeatures.llcOccupancy { + llcOccupancy, err := getIntelRdtParamUint(numaPath, "llc_occupancy") + if err != nil { + return nil, err + } + stats.LLCOccupancy = llcOccupancy + } + + return stats, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go new file mode 100644 index 0000000000..7d4b4d38a6 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go @@ -0,0 +1,681 @@ +package intelrdt + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/moby/sys/mountinfo" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/configs" +) + +/* + * About Intel RDT features: + * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). + * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are + * two sub-features of RDT. + * + * Cache Allocation Technology (CAT) provides a way for the software to restrict + * cache allocation to a defined 'subset' of L3 cache which may be overlapping + * with other 'subsets'. The different subsets are identified by class of + * service (CLOS) and each CLOS has a capacity bitmask (CBM). + * + * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle + * over memory bandwidth for the software. A user controls the resource by + * indicating the percentage of maximum memory bandwidth or memory bandwidth + * limit in MBps unit if MBA Software Controller is enabled. + * + * More details about Intel RDT CAT and MBA can be found in the section 17.18 + * of Intel Software Developer Manual: + * https://software.intel.com/en-us/articles/intel-sdm + * + * About Intel RDT kernel interface: + * In Linux 4.10 kernel or newer, the interface is defined and exposed via + * "resource control" filesystem, which is a "cgroup-like" interface. + * + * Comparing with cgroups, it has similar process management lifecycle and + * interfaces in a container. But unlike cgroups' hierarchy, it has single level + * filesystem layout. + * + * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via + * "resource control" filesystem. + * + * Intel RDT "resource control" filesystem hierarchy: + * mount -t resctrl resctrl /sys/fs/resctrl + * tree /sys/fs/resctrl + * /sys/fs/resctrl/ + * |-- info + * | |-- L3 + * | | |-- cbm_mask + * | | |-- min_cbm_bits + * | | |-- num_closids + * | |-- L3_MON + * | | |-- max_threshold_occupancy + * | | |-- mon_features + * | | |-- num_rmids + * | |-- MB + * | |-- bandwidth_gran + * | |-- delay_linear + * | |-- min_bandwidth + * | |-- num_closids + * |-- ... + * |-- schemata + * |-- tasks + * |-- + * |-- ... + * |-- schemata + * |-- tasks + * + * For runc, we can make use of `tasks` and `schemata` configuration for L3 + * cache and memory bandwidth resources constraints. + * + * The file `tasks` has a list of tasks that belongs to this group (e.g., + * " group). Tasks can be added to a group by writing the task ID + * to the "tasks" file (which will automatically remove them from the previous + * group to which they belonged). New tasks created by fork(2) and clone(2) are + * added to the same group as their parent. + * + * The file `schemata` has a list of all the resources available to this group. + * Each resource (L3 cache, memory bandwidth) has its own line and format. + * + * L3 cache schema: + * It has allocation bitmasks/values for L3 cache on each socket, which + * contains L3 cache id and capacity bitmask (CBM). + * Format: "L3:=;=;..." + * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" + * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + * + * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can + * be set is less than the max bit. The max bits in the CBM is varied among + * supported Intel CPU models. Kernel will check if it is valid when writing. + * e.g., default value 0xfffff in root indicates the max bits of CBM is 20 + * bits, which mapping to entire L3 cache capacity. Some valid CBM values to + * set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * Memory bandwidth schema: + * It has allocation values for memory bandwidth on each socket, which contains + * L3 cache id and memory bandwidth. + * Format: "MB:=bandwidth0;=bandwidth1;..." + * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" + * + * The minimum bandwidth percentage value for each CPU model is predefined and + * can be looked up through "info/MB/min_bandwidth". The bandwidth granularity + * that is allocated is also dependent on the CPU model and can be looked up at + * "info/MB/bandwidth_gran". The available bandwidth control steps are: + * min_bw + N * bw_gran. Intermediate values are rounded to the next control + * step available on the hardware. + * + * If MBA Software Controller is enabled through mount option "-o mba_MBps": + * mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + * We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit + * instead of "percentages". The kernel underneath would use a software feedback + * mechanism or a "Software Controller" which reads the actual bandwidth using + * MBM counters and adjust the memory bandwidth percentages to ensure: + * "actual memory bandwidth < user specified memory bandwidth". + * + * For example, on a two-socket machine, the schema line could be + * "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 + * and 7000 MBps memory bandwidth limit on socket 1. + * + * For more information about Intel RDT kernel interface: + * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + * + * An example for runc: + * Consider a two-socket machine with two L3 caches where the default CBM is + * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% + * with a memory bandwidth granularity of 10%. + * + * Tasks inside the container only have access to the "upper" 7/11 of L3 cache + * on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a + * maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. + * + * "linux": { + * "intelRdt": { + * "l3CacheSchema": "L3:0=7f0;1=1f", + * "memBwSchema": "MB:0=20;1=70" + * } + * } + */ + +type Manager struct { + mu sync.Mutex + config *configs.Config + id string + path string +} + +// NewManager returns a new instance of Manager, or nil if the Intel RDT +// functionality is not specified in the config, available from hardware or +// enabled in the kernel. +func NewManager(config *configs.Config, id string, path string) *Manager { + if config.IntelRdt == nil { + return nil + } + if _, err := Root(); err != nil { + // Intel RDT is not available. + return nil + } + return newManager(config, id, path) +} + +// newManager is the same as NewManager, except it does not check if the feature +// is actually available. Used by unit tests that mock intelrdt paths. +func newManager(config *configs.Config, id string, path string) *Manager { + return &Manager{ + config: config, + id: id, + path: path, + } +} + +const ( + intelRdtTasks = "tasks" +) + +var ( + // The flag to indicate if Intel RDT/CAT is enabled + catEnabled bool + // The flag to indicate if Intel RDT/MBA is enabled + mbaEnabled bool + + // For Intel RDT initialization + initOnce sync.Once + + errNotFound = errors.New("Intel RDT not available") +) + +// Check if Intel RDT sub-features are enabled in featuresInit() +func featuresInit() { + initOnce.Do(func() { + // 1. Check if Intel RDT "resource control" filesystem is available. + // The user guarantees to mount the filesystem. + root, err := Root() + if err != nil { + return + } + + // 2. Check if Intel RDT sub-features are available in "resource + // control" filesystem. Intel RDT sub-features can be + // selectively disabled or enabled by kernel command line + // (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel + if _, err := os.Stat(filepath.Join(root, "info", "L3")); err == nil { + catEnabled = true + } + if _, err := os.Stat(filepath.Join(root, "info", "MB")); err == nil { + mbaEnabled = true + } + if _, err := os.Stat(filepath.Join(root, "info", "L3_MON")); err != nil { + return + } + enabledMonFeatures, err = getMonFeatures(root) + if err != nil { + return + } + if enabledMonFeatures.mbmTotalBytes || enabledMonFeatures.mbmLocalBytes { + mbmEnabled = true + } + if enabledMonFeatures.llcOccupancy { + cmtEnabled = true + } + }) +} + +// findIntelRdtMountpointDir returns the mount point of the Intel RDT "resource control" filesystem. +func findIntelRdtMountpointDir() (string, error) { + mi, err := mountinfo.GetMounts(func(m *mountinfo.Info) (bool, bool) { + // similar to mountinfo.FSTypeFilter but stops after the first match + if m.FSType == "resctrl" { + return false, true // don't skip, stop + } + return true, false // skip, keep going + }) + if err != nil { + return "", err + } + if len(mi) < 1 { + return "", errNotFound + } + + return mi[0].Mountpoint, nil +} + +// For Root() use only. +var ( + intelRdtRoot string + intelRdtRootErr error + rootOnce sync.Once +) + +// The kernel creates this (empty) directory if resctrl is supported by the +// hardware and kernel. The user is responsible for mounting the resctrl +// filesystem, and they could mount it somewhere else if they wanted to. +const defaultResctrlMountpoint = "/sys/fs/resctrl" + +// Root returns the Intel RDT "resource control" filesystem mount point. +func Root() (string, error) { + rootOnce.Do(func() { + // Does this system support resctrl? + var statfs unix.Statfs_t + if err := unix.Statfs(defaultResctrlMountpoint, &statfs); err != nil { + if errors.Is(err, unix.ENOENT) { + err = errNotFound + } + intelRdtRootErr = err + return + } + + // Has the resctrl fs been mounted to the default mount point? + if statfs.Type == unix.RDTGROUP_SUPER_MAGIC { + intelRdtRoot = defaultResctrlMountpoint + return + } + + // The resctrl fs could have been mounted somewhere nonstandard. + intelRdtRoot, intelRdtRootErr = findIntelRdtMountpointDir() + }) + + return intelRdtRoot, intelRdtRootErr +} + +// Gets a single uint64 value from the specified file. +func getIntelRdtParamUint(path, file string) (uint64, error) { + fileName := filepath.Join(path, file) + contents, err := os.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := fscommon.ParseUint(string(bytes.TrimSpace(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified file +func getIntelRdtParamString(path, file string) (string, error) { + contents, err := os.ReadFile(filepath.Join(path, file)) + if err != nil { + return "", err + } + + return string(bytes.TrimSpace(contents)), nil +} + +func writeFile(dir, file, data string) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil { + return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err)) + } + return nil +} + +// Get the read-only L3 cache information +func getL3CacheInfo() (*L3CacheInfo, error) { + l3CacheInfo := &L3CacheInfo{} + + rootPath, err := Root() + if err != nil { + return l3CacheInfo, err + } + + path := filepath.Join(rootPath, "info", "L3") + cbmMask, err := getIntelRdtParamString(path, "cbm_mask") + if err != nil { + return l3CacheInfo, err + } + minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") + if err != nil { + return l3CacheInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return l3CacheInfo, err + } + + l3CacheInfo.CbmMask = cbmMask + l3CacheInfo.MinCbmBits = minCbmBits + l3CacheInfo.NumClosids = numClosids + + return l3CacheInfo, nil +} + +// Get the read-only memory bandwidth information +func getMemBwInfo() (*MemBwInfo, error) { + memBwInfo := &MemBwInfo{} + + rootPath, err := Root() + if err != nil { + return memBwInfo, err + } + + path := filepath.Join(rootPath, "info", "MB") + bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran") + if err != nil { + return memBwInfo, err + } + delayLinear, err := getIntelRdtParamUint(path, "delay_linear") + if err != nil { + return memBwInfo, err + } + minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth") + if err != nil { + return memBwInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return memBwInfo, err + } + + memBwInfo.BandwidthGran = bandwidthGran + memBwInfo.DelayLinear = delayLinear + memBwInfo.MinBandwidth = minBandwidth + memBwInfo.NumClosids = numClosids + + return memBwInfo, nil +} + +// Get diagnostics for last filesystem operation error from file info/last_cmd_status +func getLastCmdStatus() (string, error) { + rootPath, err := Root() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, "info") + lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status") + if err != nil { + return "", err + } + + return lastCmdStatus, nil +} + +// WriteIntelRdtTasks writes the specified pid into the "tasks" file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", intelRdtTasks) + } + + // Don't attach any pid if -1 is specified as a pid + if pid != -1 { + if err := os.WriteFile(filepath.Join(dir, intelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil { + return newLastCmdError(fmt.Errorf("intelrdt: unable to add pid %d: %w", pid, err)) + } + } + return nil +} + +// IsCATEnabled checks if Intel RDT/CAT is enabled. +func IsCATEnabled() bool { + featuresInit() + return catEnabled +} + +// IsMBAEnabled checks if Intel RDT/MBA is enabled. +func IsMBAEnabled() bool { + featuresInit() + return mbaEnabled +} + +// Get the path of the clos group in "resource control" filesystem that the container belongs to +func (m *Manager) getIntelRdtPath() (string, error) { + rootPath, err := Root() + if err != nil { + return "", err + } + + clos := m.id + if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" { + clos = m.config.IntelRdt.ClosID + } + + return filepath.Join(rootPath, clos), nil +} + +// Apply applies Intel RDT configuration to the process with the specified pid. +func (m *Manager) Apply(pid int) (err error) { + // If intelRdt is not specified in config, we do nothing + if m.config.IntelRdt == nil { + return nil + } + + path, err := m.getIntelRdtPath() + if err != nil { + return err + } + + m.mu.Lock() + defer m.mu.Unlock() + + if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" { + // Check that the CLOS exists, i.e. it has been pre-configured to + // conform with the runtime spec + if _, err := os.Stat(path); err != nil { + return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err) + } + } + + if err := os.MkdirAll(path, 0o755); err != nil { + return newLastCmdError(err) + } + + if err := WriteIntelRdtTasks(path, pid); err != nil { + return newLastCmdError(err) + } + + m.path = path + return nil +} + +// Destroy destroys the Intel RDT container-specific container_id group. +func (m *Manager) Destroy() error { + // Don't remove resctrl group if closid has been explicitly specified. The + // group is likely externally managed, i.e. by some other entity than us. + // There are probably other containers/tasks sharing the same group. + if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID == "" { + m.mu.Lock() + defer m.mu.Unlock() + if err := os.RemoveAll(m.GetPath()); err != nil { + return err + } + m.path = "" + } + return nil +} + +// GetPath returns Intel RDT path to save in a state file and to be able to +// restore the object later. +func (m *Manager) GetPath() string { + if m.path == "" { + m.path, _ = m.getIntelRdtPath() + } + return m.path +} + +// GetStats returns statistics for Intel RDT. +func (m *Manager) GetStats() (*Stats, error) { + // If intelRdt is not specified in config + if m.config.IntelRdt == nil { + return nil, nil + } + + m.mu.Lock() + defer m.mu.Unlock() + stats := newStats() + + rootPath, err := Root() + if err != nil { + return nil, err + } + // The read-only L3 cache and memory bandwidth schemata in root + tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") + if err != nil { + return nil, err + } + schemaRootStrings := strings.Split(tmpRootStrings, "\n") + + // The L3 cache and memory bandwidth schemata in container's clos group + containerPath := m.GetPath() + tmpStrings, err := getIntelRdtParamString(containerPath, "schemata") + if err != nil { + return nil, err + } + schemaStrings := strings.Split(tmpStrings, "\n") + + if IsCATEnabled() { + // The read-only L3 cache information + l3CacheInfo, err := getL3CacheInfo() + if err != nil { + return nil, err + } + stats.L3CacheInfo = l3CacheInfo + + // The read-only L3 cache schema in root + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "L3") { + stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The L3 cache schema in container's clos group + for _, schema := range schemaStrings { + if strings.Contains(schema, "L3") { + stats.L3CacheSchema = strings.TrimSpace(schema) + } + } + } + + if IsMBAEnabled() { + // The read-only memory bandwidth information + memBwInfo, err := getMemBwInfo() + if err != nil { + return nil, err + } + stats.MemBwInfo = memBwInfo + + // The read-only memory bandwidth information + for _, schemaRoot := range schemaRootStrings { + if strings.Contains(schemaRoot, "MB") { + stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot) + } + } + + // The memory bandwidth schema in container's clos group + for _, schema := range schemaStrings { + if strings.Contains(schema, "MB") { + stats.MemBwSchema = strings.TrimSpace(schema) + } + } + } + + if IsMBMEnabled() || IsCMTEnabled() { + err = getMonitoringStats(containerPath, stats) + if err != nil { + return nil, err + } + } + + return stats, nil +} + +// Set Intel RDT "resource control" filesystem as configured. +func (m *Manager) Set(container *configs.Config) error { + // About L3 cache schema: + // It has allocation bitmasks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, the schema line could be: + // L3:0=ff;1=c0 + // which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM + // is 0xc0. + // + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel CPU models. Kernel will check + // if it is valid when writing. e.g., default value 0xfffff in root + // indicates the max bits of CBM is 20 bits, which mapping to entire + // L3 cache capacity. Some valid CBM values to set in a group: + // 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + // + // + // About memory bandwidth schema: + // It has allocation values for memory bandwidth on each socket, which + // contains L3 cache id and memory bandwidth. + // Format: "MB:=bandwidth0;=bandwidth1;..." + // For example, on a two-socket machine, the schema line could be: + // "MB:0=20;1=70" + // + // The minimum bandwidth percentage value for each CPU model is + // predefined and can be looked up through "info/MB/min_bandwidth". + // The bandwidth granularity that is allocated is also dependent on + // the CPU model and can be looked up at "info/MB/bandwidth_gran". + // The available bandwidth control steps are: min_bw + N * bw_gran. + // Intermediate values are rounded to the next control step available + // on the hardware. + // + // If MBA Software Controller is enabled through mount option + // "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl + // We could specify memory bandwidth in "MBps" (Mega Bytes per second) + // unit instead of "percentages". The kernel underneath would use a + // software feedback mechanism or a "Software Controller" which reads + // the actual bandwidth using MBM counters and adjust the memory + // bandwidth percentages to ensure: + // "actual memory bandwidth < user specified memory bandwidth". + // + // For example, on a two-socket machine, the schema line could be + // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on + // socket 0 and 7000 MBps memory bandwidth limit on socket 1. + if container.IntelRdt != nil { + path := m.GetPath() + l3CacheSchema := container.IntelRdt.L3CacheSchema + memBwSchema := container.IntelRdt.MemBwSchema + + // TODO: verify that l3CacheSchema and/or memBwSchema match the + // existing schemata if ClosID has been specified. This is a more + // involved than reading the file and doing plain string comparison as + // the value written in does not necessarily match what gets read out + // (leading zeros, cache id ordering etc). + + // Write a single joint schema string to schemata file + if l3CacheSchema != "" && memBwSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil { + return err + } + } + + // Write only L3 cache schema string to schemata file + if l3CacheSchema != "" && memBwSchema == "" { + if err := writeFile(path, "schemata", l3CacheSchema); err != nil { + return err + } + } + + // Write only memory bandwidth schema string to schemata file + if l3CacheSchema == "" && memBwSchema != "" { + if err := writeFile(path, "schemata", memBwSchema); err != nil { + return err + } + } + } + + return nil +} + +func newLastCmdError(err error) error { + status, err1 := getLastCmdStatus() + if err1 == nil { + return fmt.Errorf("%w, last_cmd_status: %s", err, status) + } + return err +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go new file mode 100644 index 0000000000..669168faf6 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go @@ -0,0 +1,31 @@ +package intelrdt + +// The flag to indicate if Intel RDT/MBM is enabled +var mbmEnabled bool + +// IsMBMEnabled checks if Intel RDT/MBM is enabled. +func IsMBMEnabled() bool { + featuresInit() + return mbmEnabled +} + +func getMBMNumaNodeStats(numaPath string) (*MBMNumaNodeStats, error) { + stats := &MBMNumaNodeStats{} + if enabledMonFeatures.mbmTotalBytes { + mbmTotalBytes, err := getIntelRdtParamUint(numaPath, "mbm_total_bytes") + if err != nil { + return nil, err + } + stats.MBMTotalBytes = mbmTotalBytes + } + + if enabledMonFeatures.mbmLocalBytes { + mbmLocalBytes, err := getIntelRdtParamUint(numaPath, "mbm_local_bytes") + if err != nil { + return nil, err + } + stats.MBMLocalBytes = mbmLocalBytes + } + + return stats, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go new file mode 100644 index 0000000000..82e0002efa --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go @@ -0,0 +1,83 @@ +package intelrdt + +import ( + "bufio" + "io" + "os" + "path/filepath" + + "github.com/sirupsen/logrus" +) + +var enabledMonFeatures monFeatures + +type monFeatures struct { + mbmTotalBytes bool + mbmLocalBytes bool + llcOccupancy bool +} + +func getMonFeatures(intelRdtRoot string) (monFeatures, error) { + file, err := os.Open(filepath.Join(intelRdtRoot, "info", "L3_MON", "mon_features")) + if err != nil { + return monFeatures{}, err + } + defer file.Close() + return parseMonFeatures(file) +} + +func parseMonFeatures(reader io.Reader) (monFeatures, error) { + scanner := bufio.NewScanner(reader) + + monFeatures := monFeatures{} + + for scanner.Scan() { + switch feature := scanner.Text(); feature { + case "mbm_total_bytes": + monFeatures.mbmTotalBytes = true + case "mbm_local_bytes": + monFeatures.mbmLocalBytes = true + case "llc_occupancy": + monFeatures.llcOccupancy = true + default: + logrus.Warnf("Unsupported Intel RDT monitoring feature: %s", feature) + } + } + + return monFeatures, scanner.Err() +} + +func getMonitoringStats(containerPath string, stats *Stats) error { + numaFiles, err := os.ReadDir(filepath.Join(containerPath, "mon_data")) + if err != nil { + return err + } + + var mbmStats []MBMNumaNodeStats + var cmtStats []CMTNumaNodeStats + + for _, file := range numaFiles { + if file.IsDir() { + numaPath := filepath.Join(containerPath, "mon_data", file.Name()) + if IsMBMEnabled() { + numaMBMStats, err := getMBMNumaNodeStats(numaPath) + if err != nil { + return err + } + mbmStats = append(mbmStats, *numaMBMStats) + } + if IsCMTEnabled() { + numaCMTStats, err := getCMTNumaNodeStats(numaPath) + if err != nil { + return err + } + cmtStats = append(cmtStats, *numaCMTStats) + } + } + } + + stats.MBMStats = &mbmStats + stats.CMTStats = &cmtStats + + return err +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go new file mode 100644 index 0000000000..a5eb2541e8 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go @@ -0,0 +1,57 @@ +package intelrdt + +type L3CacheInfo struct { + CbmMask string `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type MemBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type MBMNumaNodeStats struct { + // The 'mbm_total_bytes' in 'container_id' group. + MBMTotalBytes uint64 `json:"mbm_total_bytes"` + + // The 'mbm_local_bytes' in 'container_id' group. + MBMLocalBytes uint64 `json:"mbm_local_bytes"` +} + +type CMTNumaNodeStats struct { + // The 'llc_occupancy' in 'container_id' group. + LLCOccupancy uint64 `json:"llc_occupancy"` +} + +type Stats struct { + // The read-only L3 cache information + L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` + + // The read-only L3 cache schema in root + L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` + + // The L3 cache schema in 'container_id' group + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` + + // The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group + MBMStats *[]MBMNumaNodeStats `json:"mbm_stats,omitempty"` + + // The cache monitoring technology statistics from NUMA nodes in 'container_id' group + CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"` +} + +func newStats() *Stats { + return &Stats{} +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.c b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.c new file mode 100644 index 0000000000..fdb20aecad --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.c @@ -0,0 +1,81 @@ +//go:build linux + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +/* + * All of the code here is run inside an aync-signal-safe context, so we need + * to be careful to not call any functions that could cause issues. In theory, + * since we are a Go program, there are fewer restrictions in practice, it's + * better to be safe than sorry. + * + * The only exception is exit, which we need to call to make sure we don't + * return into runc. + */ + +void bail(int pipefd, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vdprintf(pipefd, fmt, args); + va_end(args); + + exit(1); +} + +int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd) +{ + char buffer[4096] = { 0 }; + + pid_t child = fork(); + if (child != 0) + return child; + /* in child */ + + /* Join the target userns. */ + int nsfd = open(userns_path, O_RDONLY); + if (nsfd < 0) + bail(errfd, "open userns path %s failed: %m", userns_path); + + int err = setns(nsfd, CLONE_NEWUSER); + if (err < 0) + bail(errfd, "setns %s failed: %m", userns_path); + + close(nsfd); + + /* Pipe the requested file contents. */ + int fd = open(path, O_RDONLY); + if (fd < 0) + bail(errfd, "open %s in userns %s failed: %m", path, userns_path); + + int nread, ntotal = 0; + while ((nread = read(fd, buffer, sizeof(buffer))) != 0) { + if (nread < 0) + bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal); + ntotal += nread; + + int nwritten = 0; + while (nwritten < nread) { + int n = write(outfd, buffer, nread - nwritten); + if (n < 0) + bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m", + nread - nwritten, path, nwritten); + nwritten += n; + } + if (nread != nwritten) + bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten); + } + + close(fd); + close(outfd); + close(errfd); + + /* We must exit here, otherwise we would return into a forked runc. */ + exit(0); +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.go new file mode 100644 index 0000000000..7a8c2b023b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/userns_maps_linux.go @@ -0,0 +1,186 @@ +//go:build linux + +package userns + +import ( + "bufio" + "bytes" + "fmt" + "io" + "os" + "unsafe" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/sirupsen/logrus" +) + +/* +#include +extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd); +*/ +import "C" + +func parseIdmapData(data []byte) (ms []configs.IDMap, err error) { + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + var m configs.IDMap + line := scanner.Text() + if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil { + return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err) + } + ms = append(ms, m) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("parsing id map failed: %w", err) + } + return ms, nil +} + +// Do something equivalent to nsenter --user= cat , but more +// efficiently. Returns the contents of the requested file from within the user +// namespace. +func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) { + rdr, wtr, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err) + } + defer rdr.Close() + defer wtr.Close() + + errRdr, errWtr, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err) + } + defer errRdr.Close() + defer errWtr.Close() + + cNsPath := C.CString(nsPath) + defer C.free(unsafe.Pointer(cNsPath)) + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd())) + + if childPid < 0 { + return nil, fmt.Errorf("failed to spawn fork for userns") + } else if childPid == 0 { + // this should never happen + panic("runc executing inside fork child -- unsafe state!") + } + + // We are in the parent -- close the write end of the pipe before reading. + wtr.Close() + output, err := io.ReadAll(rdr) + rdr.Close() + if err != nil { + return nil, fmt.Errorf("reading from userns spawn failed: %w", err) + } + + // Ditto for the error pipe. + errWtr.Close() + errOutput, err := io.ReadAll(errRdr) + errRdr.Close() + if err != nil { + return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err) + } + errOutput = bytes.TrimSpace(errOutput) + + // Clean up the child. + child, err := os.FindProcess(int(childPid)) + if err != nil { + return nil, fmt.Errorf("could not find userns spawn process: %w", err) + } + state, err := child.Wait() + if err != nil { + return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err) + } + if !state.Success() { + errStr := string(errOutput) + if errStr == "" { + errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode()) + } + return nil, fmt.Errorf("userns spawn: %s", errStr) + } else if len(errOutput) > 0 { + // We can just ignore weird output in the error pipe if the process + // didn't bail(), but for completeness output for debugging. + logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput)) + } + // The subprocess succeeded, return whatever it wrote to the pipe. + return output, nil +} + +func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) { + var ( + pid int + extra rune + tryFastPath bool + ) + + // nsPath is usually of the form /proc//ns/user, which means that we + // already have a pid that is part of the user namespace and thus we can + // just use the pid to read from /proc//*id_map. + // + // Note that Sscanf doesn't consume the whole input, so we check for any + // trailing data with %c. That way, we can be sure the pattern matched + // /proc/$pid/ns/user _exactly_ iff n === 1. + if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 { + tryFastPath = pid > 0 + } + + for _, mapType := range []struct { + name string + idMap *[]configs.IDMap + }{ + {"uid_map", &uidMap}, + {"gid_map", &gidMap}, + } { + var mapData []byte + + if tryFastPath { + path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name) + data, err := os.ReadFile(path) + if err != nil { + // Do not error out here -- we need to try the slow path if the + // fast path failed. + logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err) + } else { + mapData = data + } + } else { + logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath) + } + + if mapData == nil { + // We have to actually join the namespace if we cannot take the + // fast path. The path is resolved with respect to the child + // process, so just use /proc/self. + data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name) + if err != nil { + return nil, nil, err + } + mapData = data + } + idMap, err := parseIdmapData(mapData) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err) + } + *mapType.idMap = idMap + } + + return uidMap, gidMap, nil +} + +// IsSameMapping returns whether or not the two id mappings are the same. Note +// that if the order of the mappings is different, or a mapping has been split, +// the mappings will be considered different. +func IsSameMapping(a, b []configs.IDMap) bool { + if len(a) != len(b) { + return false + } + for idx := range a { + if a[idx] != b[idx] { + return false + } + } + return true +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/usernsfd_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/usernsfd_linux.go new file mode 100644 index 0000000000..00b5cd0a21 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/internal/userns/usernsfd_linux.go @@ -0,0 +1,156 @@ +package userns + +import ( + "fmt" + "os" + "sort" + "strings" + "sync" + "syscall" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Mapping struct { + UIDMappings []configs.IDMap + GIDMappings []configs.IDMap +} + +func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { + for _, uid := range m.UIDMappings { + uids = append(uids, syscall.SysProcIDMap{ + ContainerID: int(uid.ContainerID), + HostID: int(uid.HostID), + Size: int(uid.Size), + }) + } + for _, gid := range m.GIDMappings { + gids = append(gids, syscall.SysProcIDMap{ + ContainerID: int(gid.ContainerID), + HostID: int(gid.HostID), + Size: int(gid.Size), + }) + } + return uids, gids +} + +// id returns a unique identifier for this mapping, agnostic of the order of +// the uid and gid mappings (because the order doesn't matter to the kernel). +// The set of userns handles is indexed using this ID. +func (m Mapping) id() string { + var uids, gids []string + for _, idmap := range m.UIDMappings { + uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) + } + for _, idmap := range m.GIDMappings { + gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) + } + // We don't care about the sort order -- just sort them. + sort.Strings(uids) + sort.Strings(gids) + return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") +} + +type Handles struct { + m sync.Mutex + maps map[string]*os.File +} + +// Release all resources associated with this Handle. All existing files +// returned from Get() will continue to work even after calling Release(). The +// same Handles can be re-used after calling Release(). +func (hs *Handles) Release() { + hs.m.Lock() + defer hs.m.Unlock() + + // Close the files for good measure, though GC will do that for us anyway. + for _, file := range hs.maps { + _ = file.Close() + } + hs.maps = nil +} + +func spawnProc(req Mapping) (*os.Process, error) { + // We need to spawn a subprocess with the requested mappings, which is + // unfortunately quite expensive. The "safe" way of doing this is natively + // with Go (and then spawning something like "sleep infinity"), but + // execve() is a waste of cycles because we just need some process to have + // the right mapping, we don't care what it's executing. The "unsafe" + // option of doing a clone() behind the back of Go is probably okay in + // theory as long as we just do kill(getpid(), SIGSTOP). However, if we + // tell Go to put the new process into PTRACE_TRACEME mode, we can avoid + // the exec and not have to faff around with the mappings. + // + // Note that Go's stdlib does not support newuidmap, but in the case of + // id-mapped mounts, it seems incredibly unlikely that the user will be + // requesting us to do a remapping as an unprivileged user with mappings + // they have privileges over. + logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) + uidMappings, gidMappings := req.toSys() + // We don't need to use /proc/thread-self here because the exe mm of a + // thread-group is guaranteed to be the same for all threads by definition. + // This lets us avoid having to do runtime.LockOSThread. + return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ + Sys: &syscall.SysProcAttr{ + Cloneflags: unix.CLONE_NEWUSER, + UidMappings: uidMappings, + GidMappings: gidMappings, + GidMappingsEnableSetgroups: false, + // Put the process into PTRACE_TRACEME mode to allow us to get the + // userns without having a proper execve() target. + Ptrace: true, + }, + }) +} + +func dupFile(f *os.File) (*os.File, error) { + newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) + } + return os.NewFile(uintptr(newFd), f.Name()), nil +} + +// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested +// mapping. The processes spawned to produce userns nsfds are cached, so if +// equivalent user namespace mappings are requested, the same user namespace +// will be returned. The caller is responsible for closing the returned file +// descriptor. +func (hs *Handles) Get(req Mapping) (file *os.File, err error) { + hs.m.Lock() + defer hs.m.Unlock() + + if hs.maps == nil { + hs.maps = make(map[string]*os.File) + } + + file, ok := hs.maps[req.id()] + if !ok { + proc, err := spawnProc(req) + if err != nil { + return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) + } + // Make sure we kill the helper process. We ignore errors because + // there's not much we can do about them anyway, and ultimately + defer func() { + _ = proc.Kill() + _, _ = proc.Wait() + }() + + // Stash away a handle to the userns file. This is neater than keeping + // the process alive, because Go's GC can handle files much better than + // leaked processes, and having long-living useless processes seems + // less than ideal. + file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) + if err != nil { + return nil, err + } + hs.maps[req.id()] = file + } + // Duplicate the file, to make sure the lifecycle of each *os.File we + // return is independent. + return dupFile(file) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go new file mode 100644 index 0000000000..f3a6c5343f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go @@ -0,0 +1,45 @@ +package keys + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "golang.org/x/sys/unix" +) + +type KeySerial uint32 + +func JoinSessionKeyring(name string) (KeySerial, error) { + sessKeyID, err := unix.KeyctlJoinSessionKeyring(name) + if err != nil { + return 0, fmt.Errorf("unable to create session key: %w", err) + } + return KeySerial(sessKeyID), nil +} + +// ModKeyringPerm modifies permissions on a keyring by reading the current permissions, +// anding the bits with the given mask (clearing permissions) and setting +// additional permission bits +func ModKeyringPerm(ringID KeySerial, mask, setbits uint32) error { + dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringID)) + if err != nil { + return err + } + + res := strings.Split(dest, ";") + if len(res) < 5 { + return errors.New("Destination buffer for key description is too small") + } + + // parse permissions + perm64, err := strconv.ParseUint(res[3], 16, 32) + if err != nil { + return err + } + + perm := (uint32(perm64) & mask) | setbits + + return unix.KeyctlSetperm(int(ringID), perm) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go b/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go new file mode 100644 index 0000000000..95deb0d6ca --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go @@ -0,0 +1,56 @@ +package logs + +import ( + "bufio" + "encoding/json" + "io" + + "github.com/sirupsen/logrus" +) + +func ForwardLogs(logPipe io.ReadCloser) chan error { + done := make(chan error, 1) + s := bufio.NewScanner(logPipe) + + logger := logrus.StandardLogger() + if logger.ReportCaller { + // Need a copy of the standard logger, but with ReportCaller + // turned off, as the logs are merely forwarded and their + // true source is not this file/line/function. + logNoCaller := *logrus.StandardLogger() + logNoCaller.ReportCaller = false + logger = &logNoCaller + } + + go func() { + for s.Scan() { + processEntry(s.Bytes(), logger) + } + if err := logPipe.Close(); err != nil { + logrus.Errorf("error closing log source: %v", err) + } + // The only error we want to return is when reading from + // logPipe has failed. + done <- s.Err() + close(done) + }() + + return done +} + +func processEntry(text []byte, logger *logrus.Logger) { + if len(text) == 0 { + return + } + + var jl struct { + Level logrus.Level `json:"level"` + Msg string `json:"msg"` + } + if err := json.Unmarshal(text, &jl); err != nil { + logrus.Errorf("failed to decode %q to json: %v", text, err) + return + } + + logger.Log(jl.Level, jl.Msg) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go new file mode 100644 index 0000000000..2790f018d0 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go @@ -0,0 +1,97 @@ +package libcontainer + +import ( + "fmt" + "math" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// list of known message types we want to send to bootstrap program +// The number is randomly chosen to not conflict with known netlink types +const ( + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + RootlessEUIDAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 + TimeOffsetsAttr uint16 = 27290 +) + +type Int32msg struct { + Type uint16 + Value uint32 +} + +// Serialize serializes the message. +// Int32msg has the following representation +// | nlattr len | nlattr type | +// | uint32 value | +func (msg *Int32msg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + native.PutUint32(buf[4:8], msg.Value) + return buf +} + +func (msg *Int32msg) Len() int { + return unix.NLA_HDRLEN + 4 +} + +// Bytemsg has the following representation +// | nlattr len | nlattr type | +// | value | pad | +type Bytemsg struct { + Type uint16 + Value []byte +} + +func (msg *Bytemsg) Serialize() []byte { + l := msg.Len() + if l > math.MaxUint16 { + // We cannot return nil nor an error here, so we panic with + // a specific type instead, which is handled via recover in + // bootstrapData. + panic(netlinkError{fmt.Errorf("netlink: cannot serialize bytemsg of length %d (larger than UINT16_MAX)", l)}) + } + buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1)) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(l)) + native.PutUint16(buf[2:4], msg.Type) + copy(buf[4:], msg.Value) + return buf +} + +func (msg *Bytemsg) Len() int { + return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated +} + +type Boolmsg struct { + Type uint16 + Value bool +} + +func (msg *Boolmsg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + if msg.Value { + native.PutUint32(buf[4:8], uint32(1)) + } else { + native.PutUint32(buf[4:8], uint32(0)) + } + return buf +} + +func (msg *Boolmsg) Len() int { + return unix.NLA_HDRLEN + 4 // alignment +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go new file mode 100644 index 0000000000..9d4b5dcef5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/mount_linux.go @@ -0,0 +1,339 @@ +package libcontainer + +import ( + "errors" + "fmt" + "io/fs" + "os" + "strconv" + "strings" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/internal/userns" + "github.com/opencontainers/runc/libcontainer/utils" +) + +// mountSourceType indicates what type of file descriptor is being returned. It +// is used to tell rootfs_linux.go whether or not to use move_mount(2) to +// install the mount. +type mountSourceType string + +const ( + // An open_tree(2)-style file descriptor that needs to be installed using + // move_mount(2) to install. + mountSourceOpenTree mountSourceType = "open_tree" + // A plain file descriptor that can be mounted through /proc/thread-self/fd. + mountSourcePlain mountSourceType = "plain-open" +) + +type mountSource struct { + Type mountSourceType `json:"type"` + file *os.File `json:"-"` +} + +// mountError holds an error from a failed mount or unmount operation. +type mountError struct { + op string + source string + srcFile *mountSource + target string + dstFd string + flags uintptr + data string + err error +} + +// int32plus is a collection of int types with >=32 bits. +type int32plus interface { + int | uint | int32 | uint32 | int64 | uint64 | uintptr +} + +// stringifyMountFlags converts mount(2) flags to a string that you can use in +// error messages. +func stringifyMountFlags[Int int32plus](flags Int) string { + flagNames := []struct { + name string + bits Int + }{ + {"MS_RDONLY", unix.MS_RDONLY}, + {"MS_NOSUID", unix.MS_NOSUID}, + {"MS_NODEV", unix.MS_NODEV}, + {"MS_NOEXEC", unix.MS_NOEXEC}, + {"MS_SYNCHRONOUS", unix.MS_SYNCHRONOUS}, + {"MS_REMOUNT", unix.MS_REMOUNT}, + {"MS_MANDLOCK", unix.MS_MANDLOCK}, + {"MS_DIRSYNC", unix.MS_DIRSYNC}, + {"MS_NOSYMFOLLOW", unix.MS_NOSYMFOLLOW}, + // No (1 << 9) flag. + {"MS_NOATIME", unix.MS_NOATIME}, + {"MS_NODIRATIME", unix.MS_NODIRATIME}, + {"MS_BIND", unix.MS_BIND}, + {"MS_MOVE", unix.MS_MOVE}, + {"MS_REC", unix.MS_REC}, + // MS_VERBOSE was deprecated and swapped to MS_SILENT. + {"MS_SILENT", unix.MS_SILENT}, + {"MS_POSIXACL", unix.MS_POSIXACL}, + {"MS_UNBINDABLE", unix.MS_UNBINDABLE}, + {"MS_PRIVATE", unix.MS_PRIVATE}, + {"MS_SLAVE", unix.MS_SLAVE}, + {"MS_SHARED", unix.MS_SHARED}, + {"MS_RELATIME", unix.MS_RELATIME}, + // MS_KERNMOUNT (1 << 22) is internal to the kernel. + {"MS_I_VERSION", unix.MS_I_VERSION}, + {"MS_STRICTATIME", unix.MS_STRICTATIME}, + {"MS_LAZYTIME", unix.MS_LAZYTIME}, + } + var ( + flagSet []string + seenBits Int + ) + for _, flag := range flagNames { + if flags&flag.bits == flag.bits { + seenBits |= flag.bits + flagSet = append(flagSet, flag.name) + } + } + // If there were any remaining flags specified we don't know the name of, + // just add them in an 0x... format. + if remaining := flags &^ seenBits; remaining != 0 { + flagSet = append(flagSet, "0x"+strconv.FormatUint(uint64(remaining), 16)) + } + return strings.Join(flagSet, "|") +} + +// Error provides a string error representation. +func (e *mountError) Error() string { + out := e.op + " " + + if e.source != "" { + out += "src=" + e.source + ", " + if e.srcFile != nil { + out += "srcType=" + string(e.srcFile.Type) + ", " + out += "srcFd=" + strconv.Itoa(int(e.srcFile.file.Fd())) + ", " + } + } + out += "dst=" + e.target + if e.dstFd != "" { + out += ", dstFd=" + e.dstFd + } + + if e.flags != uintptr(0) { + out += ", flags=" + stringifyMountFlags(e.flags) + } + if e.data != "" { + out += ", data=" + e.data + } + + out += ": " + e.err.Error() + return out +} + +// Unwrap returns the underlying error. +// This is a convention used by Go 1.13+ standard library. +func (e *mountError) Unwrap() error { + return e.err +} + +// mount is a simple unix.Mount wrapper, returning an error with more context +// in case it failed. +func mount(source, target, fstype string, flags uintptr, data string) error { + return mountViaFds(source, nil, target, "", fstype, flags, data) +} + +// mountViaFds is a unix.Mount wrapper which uses srcFile instead of source, +// and dstFd instead of target, unless those are empty. +// +// If srcFile is non-nil and flags does not contain MS_REMOUNT, mountViaFds +// will mount it according to the mountSourceType of the file descriptor. +// +// The dstFd argument, if non-empty, is expected to be in the form of a path to +// an opened file descriptor on procfs (i.e. "/proc/thread-self/fd/NN"). +// +// If a file descriptor is used instead of a source or a target path, the +// corresponding path is only used to add context to an error in case the mount +// operation has failed. +func mountViaFds(source string, srcFile *mountSource, target, dstFd, fstype string, flags uintptr, data string) error { + // MS_REMOUNT and srcFile don't make sense together. + if srcFile != nil && flags&unix.MS_REMOUNT != 0 { + logrus.Debugf("mount source passed along with MS_REMOUNT -- ignoring srcFile") + srcFile = nil + } + dst := target + if dstFd != "" { + dst = dstFd + } + src := source + isMoveMount := srcFile != nil && srcFile.Type == mountSourceOpenTree + if srcFile != nil { + // If we're going to use the /proc/thread-self/... path for classic + // mount(2), we need to get a safe handle to /proc/thread-self. This + // isn't needed for move_mount(2) because in that case the path is just + // a dummy string used for error info. + srcFileFd := srcFile.file.Fd() + if isMoveMount { + src = "/proc/self/fd/" + strconv.Itoa(int(srcFileFd)) + } else { + var closer utils.ProcThreadSelfCloser + src, closer = utils.ProcThreadSelfFd(srcFileFd) + defer closer() + } + } + + var op string + var err error + if isMoveMount { + op = "move_mount" + err = unix.MoveMount(int(srcFile.file.Fd()), "", + unix.AT_FDCWD, dstFd, + unix.MOVE_MOUNT_F_EMPTY_PATH|unix.MOVE_MOUNT_T_SYMLINKS) + } else { + op = "mount" + err = unix.Mount(src, dst, fstype, flags, data) + } + if err != nil { + return &mountError{ + op: op, + source: source, + srcFile: srcFile, + target: target, + dstFd: dstFd, + flags: flags, + data: data, + err: err, + } + } + return nil +} + +// unmount is a simple unix.Unmount wrapper. +func unmount(target string, flags int) error { + err := unix.Unmount(target, flags) + if err != nil { + return &mountError{ + op: "unmount", + target: target, + flags: uintptr(flags), + err: err, + } + } + return nil +} + +// syscallMode returns the syscall-specific mode bits from Go's portable mode bits. +// Copy from https://cs.opensource.google/go/go/+/refs/tags/go1.20.7:src/os/file_posix.go;l=61-75 +func syscallMode(i fs.FileMode) (o uint32) { + o |= uint32(i.Perm()) + if i&fs.ModeSetuid != 0 { + o |= unix.S_ISUID + } + if i&fs.ModeSetgid != 0 { + o |= unix.S_ISGID + } + if i&fs.ModeSticky != 0 { + o |= unix.S_ISVTX + } + // No mapping for Go's ModeTemporary (plan9 only). + return o +} + +// mountFd creates a "mount source fd" (either through open_tree(2) or just +// open(O_PATH)) based on the provided configuration. This function must be +// called from within the container's mount namespace. +// +// In the case of idmapped mount configurations, the returned mount source will +// be an open_tree(2) file with MOUNT_ATTR_IDMAP applied. For other +// bind-mounts, it will be an O_PATH. If the type of mount cannot be handled, +// the returned mountSource will be nil, indicating that the container init +// process will need to do an old-fashioned mount(2) themselves. +// +// This helper is only intended to be used by goCreateMountSources. +func mountFd(nsHandles *userns.Handles, m *configs.Mount) (*mountSource, error) { + if !m.IsBind() { + return nil, errors.New("new mount api: only bind-mounts are supported") + } + if nsHandles == nil { + nsHandles = new(userns.Handles) + defer nsHandles.Release() + } + + var mountFile *os.File + var sourceType mountSourceType + + // Ideally, we would use OPEN_TREE_CLONE for everything, because we can + // be sure that the file descriptor cannot be used to escape outside of + // the mount root. Unfortunately, OPEN_TREE_CLONE is far more expensive + // than open(2) because it requires doing mounts inside a new anonymous + // mount namespace. So we use open(2) for standard bind-mounts, and + // OPEN_TREE_CLONE when we need to set mount attributes here. + // + // While passing open(2)'d paths from the host rootfs isn't exactly the + // safest thing in the world, the files will not survive across + // execve(2) and "runc init" is non-dumpable so it should not be + // possible for a malicious container process to gain access to the + // file descriptors. We also don't do any of this for "runc exec", + // lessening the risk even further. + if m.IsIDMapped() { + flags := uint(unix.OPEN_TREE_CLONE | unix.OPEN_TREE_CLOEXEC) + if m.Flags&unix.MS_REC == unix.MS_REC { + flags |= unix.AT_RECURSIVE + } + fd, err := unix.OpenTree(unix.AT_FDCWD, m.Source, flags) + if err != nil { + return nil, &os.PathError{Op: "open_tree(OPEN_TREE_CLONE)", Path: m.Source, Err: err} + } + mountFile = os.NewFile(uintptr(fd), m.Source) + sourceType = mountSourceOpenTree + + // Configure the id mapping. + var usernsFile *os.File + if m.IDMapping.UserNSPath == "" { + usernsFile, err = nsHandles.Get(userns.Mapping{ + UIDMappings: m.IDMapping.UIDMappings, + GIDMappings: m.IDMapping.GIDMappings, + }) + if err != nil { + return nil, fmt.Errorf("failed to create userns for %s id-mapping: %w", m.Source, err) + } + } else { + usernsFile, err = os.Open(m.IDMapping.UserNSPath) + if err != nil { + return nil, fmt.Errorf("failed to open existing userns for %s id-mapping: %w", m.Source, err) + } + } + defer usernsFile.Close() + + setAttrFlags := uint(unix.AT_EMPTY_PATH) + // If the mount has "ridmap" set, we apply the configuration + // recursively. This allows you to create "rbind" mounts where only + // the top-level mount has an idmapping. I'm not sure why you'd + // want that, but still... + if m.IDMapping.Recursive { + setAttrFlags |= unix.AT_RECURSIVE + } + if err := unix.MountSetattr(int(mountFile.Fd()), "", setAttrFlags, &unix.MountAttr{ + Attr_set: unix.MOUNT_ATTR_IDMAP, + Userns_fd: uint64(usernsFile.Fd()), + }); err != nil { + extraMsg := "" + if err == unix.EINVAL { + extraMsg = " (maybe the filesystem used doesn't support idmap mounts on this kernel?)" + } + + return nil, fmt.Errorf("failed to set MOUNT_ATTR_IDMAP on %s: %w%s", m.Source, err, extraMsg) + } + } else { + var err error + mountFile, err = os.OpenFile(m.Source, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } + sourceType = mountSourcePlain + } + return &mountSource{ + Type: sourceType, + file: mountFile, + }, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go new file mode 100644 index 0000000000..8915548b3b --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go @@ -0,0 +1,100 @@ +package libcontainer + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/types" + "github.com/vishvananda/netlink" +) + +var strategies = map[string]networkStrategy{ + "loopback": &loopback{}, +} + +// networkStrategy represents a specific network configuration for +// a container's networking stack +type networkStrategy interface { + create(*network, int) error + initialize(*network) error + detach(*configs.Network) error + attach(*configs.Network) error +} + +// getStrategy returns the specific network strategy for the +// provided type. +func getStrategy(tpe string) (networkStrategy, error) { + s, exists := strategies[tpe] + if !exists { + return nil, fmt.Errorf("unknown strategy type %q", tpe) + } + return s, nil +} + +// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. +func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, error) { + out := &types.NetworkInterface{Name: interfaceName} + // This can happen if the network runtime information is missing - possible if the + // container was created by an old version of libcontainer. + if interfaceName == "" { + return out, nil + } + type netStatsPair struct { + // Where to write the output. + Out *uint64 + // The network stats file to read. + File string + } + // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. + netStats := []netStatsPair{ + {Out: &out.RxBytes, File: "tx_bytes"}, + {Out: &out.RxPackets, File: "tx_packets"}, + {Out: &out.RxErrors, File: "tx_errors"}, + {Out: &out.RxDropped, File: "tx_dropped"}, + + {Out: &out.TxBytes, File: "rx_bytes"}, + {Out: &out.TxPackets, File: "rx_packets"}, + {Out: &out.TxErrors, File: "rx_errors"}, + {Out: &out.TxDropped, File: "rx_dropped"}, + } + for _, netStat := range netStats { + data, err := readSysfsNetworkStats(interfaceName, netStat.File) + if err != nil { + return nil, err + } + *(netStat.Out) = data + } + return out, nil +} + +// Reads the specified statistics available under /sys/class/net//statistics +func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { + data, err := os.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) + if err != nil { + return 0, err + } + return strconv.ParseUint(string(bytes.TrimSpace(data)), 10, 64) +} + +// loopback is a network strategy that provides a basic loopback device +type loopback struct{} + +func (l *loopback) create(n *network, nspid int) error { + return nil +} + +func (l *loopback) initialize(config *network) error { + return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) +} + +func (l *loopback) attach(n *configs.Network) (err error) { + return nil +} + +func (l *loopback) detach(n *configs.Network) (err error) { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go new file mode 100644 index 0000000000..a8762842e8 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go @@ -0,0 +1,84 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "golang.org/x/sys/unix" +) + +type PressureLevel uint + +const ( + LowPressure PressureLevel = iota + MediumPressure + CriticalPressure +) + +func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) { + evFile, err := os.Open(filepath.Join(cgDir, evName)) + if err != nil { + return nil, err + } + fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC) + if err != nil { + evFile.Close() + return nil, err + } + + eventfd := os.NewFile(uintptr(fd), "eventfd") + + eventControlPath := filepath.Join(cgDir, "cgroup.event_control") + data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg) + if err := os.WriteFile(eventControlPath, []byte(data), 0o700); err != nil { + eventfd.Close() + evFile.Close() + return nil, err + } + ch := make(chan struct{}) + go func() { + defer func() { + eventfd.Close() + evFile.Close() + close(ch) + }() + buf := make([]byte, 8) + for { + if _, err := eventfd.Read(buf); err != nil { + return + } + // When a cgroup is destroyed, an event is sent to eventfd. + // So if the control path is gone, return instead of notifying. + if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { + return + } + ch <- struct{}{} + } + }() + return ch, nil +} + +// notifyOnOOM returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +func notifyOnOOM(dir string) (<-chan struct{}, error) { + if dir == "" { + return nil, errors.New("memory controller missing") + } + + return registerMemoryEvent(dir, "memory.oom_control", "") +} + +func notifyMemoryPressure(dir string, level PressureLevel) (<-chan struct{}, error) { + if dir == "" { + return nil, errors.New("memory controller missing") + } + + if level > CriticalPressure { + return nil, fmt.Errorf("invalid pressure level %d", level) + } + + levelStr := []string{"low", "medium", "critical"}[level] + return registerMemoryEvent(dir, "memory.pressure_level", levelStr) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go new file mode 100644 index 0000000000..751f4c91db --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_v2_linux.go @@ -0,0 +1,85 @@ +package libcontainer + +import ( + "fmt" + "os" + "path/filepath" + "unsafe" + + "github.com/opencontainers/cgroups/fscommon" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) { + fd, err := unix.InotifyInit() + if err != nil { + return nil, fmt.Errorf("unable to init inotify: %w", err) + } + // watching oom kill + evFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, evName), unix.IN_MODIFY) + if err != nil { + unix.Close(fd) + return nil, fmt.Errorf("unable to add inotify watch: %w", err) + } + // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited + cgFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, cgEvName), unix.IN_MODIFY) + if err != nil { + unix.Close(fd) + return nil, fmt.Errorf("unable to add inotify watch: %w", err) + } + ch := make(chan struct{}) + go func() { + var ( + buffer [unix.SizeofInotifyEvent + unix.PathMax + 1]byte + offset uint32 + ) + defer func() { + unix.Close(fd) + close(ch) + }() + + for { + n, err := unix.Read(fd, buffer[:]) + if err == unix.EINTR { //nolint:errorlint // unix errors are bare + continue + } + if err != nil { + err = os.NewSyscallError("read", err) + logrus.Warnf("unable to read event data from inotify, got error: %v", err) + return + } + if n < unix.SizeofInotifyEvent { + logrus.Warnf("we should read at least %d bytes from inotify, but got %d bytes.", unix.SizeofInotifyEvent, n) + return + } + offset = 0 + for offset <= uint32(n-unix.SizeofInotifyEvent) { + rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset])) + offset += unix.SizeofInotifyEvent + rawEvent.Len + if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY { + continue + } + switch int(rawEvent.Wd) { + case evFd: + oom, err := fscommon.GetValueByKey(cgDir, evName, "oom_kill") + if err != nil || oom > 0 { + ch <- struct{}{} + } + case cgFd: + pids, err := fscommon.GetValueByKey(cgDir, cgEvName, "populated") + if err != nil || pids == 0 { + return + } + } + } + } + }() + return ch, nil +} + +// notifyOnOOMV2 returns channel on which you can expect event about OOM, +// if process died without OOM this channel will be closed. +func notifyOnOOMV2(path string) (<-chan struct{}, error) { + return registerMemoryEventV2(path, "memory.events", "cgroup.events") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process.go b/vendor/github.com/opencontainers/runc/libcontainer/process.go new file mode 100644 index 0000000000..7fca1febce --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go @@ -0,0 +1,169 @@ +package libcontainer + +import ( + "errors" + "io" + "math" + "os" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var errInvalidProcess = errors.New("invalid process") + +type processOperations interface { + wait() (*os.ProcessState, error) + signal(sig os.Signal) error + pid() int +} + +// Process defines the configuration and IO for a process inside a container. +// +// Note that some Process properties are also present in container configuration +// ([configs.Config]). In all such cases, Process properties take precedence +// over container configuration ones. +type Process struct { + // The command to be run followed by any arguments. + Args []string + + // Env specifies the environment variables for the process. + Env []string + + // UID and GID of the executing process running inside the container + // local to the container's user and group configuration. + UID, GID int + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []int + + // Cwd will change the process's current working directory inside the container's rootfs. + Cwd string + + // Stdin is a reader which provides the standard input stream. + Stdin io.Reader + + // Stdout is a writer which receives the standard output stream. + Stdout io.Writer + + // Stderr is a writer which receives the standard error stream. + Stderr io.Writer + + // ExtraFiles specifies additional open files to be inherited by the process. + ExtraFiles []*os.File + + // Open handles to cloned binaries -- see exeseal.CloneSelfExe for more details. + clonedExes []*os.File + + // Initial size for the console. + ConsoleWidth uint16 + ConsoleHeight uint16 + + // Capabilities specify the capabilities to keep when executing the process. + // All capabilities not specified will be dropped from the processes capability mask. + // + // If not nil, takes precedence over container's [configs.Config.Capabilities]. + Capabilities *configs.Capabilities + + // AppArmorProfile specifies the profile to apply to the process and is + // changed at the time the process is executed. + // + // If not empty, takes precedence over container's [configs.Config.AppArmorProfile]. + AppArmorProfile string + + // Label specifies the label to apply to the process. It is commonly used by selinux. + // + // If not empty, takes precedence over container's [configs.Config.ProcessLabel]. + Label string + + // NoNewPrivileges controls whether processes can gain additional privileges. + // + // If not nil, takes precedence over container's [configs.Config.NoNewPrivileges]. + NoNewPrivileges *bool + + // Rlimits specifies the resource limits, such as max open files, to set for the process. + // If unset, the process will inherit rlimits from the parent process. + // + // If not empty, takes precedence over container's [configs.Config.Rlimit]. + Rlimits []configs.Rlimit + + // ConsoleSocket provides the masterfd console. + ConsoleSocket *os.File + + // PidfdSocket provides process file descriptor of it own. + PidfdSocket *os.File + + // Init specifies whether the process is the first process in the container. + Init bool + + ops processOperations + + // LogLevel is a string containing a numeric representation of the current + // log level (i.e. "4", but never "info"). It is passed on to runc init as + // _LIBCONTAINER_LOGLEVEL environment variable. + LogLevel string + + // SubCgroupPaths specifies sub-cgroups to run the process in. + // Map keys are controller names, map values are paths (relative to + // container's top-level cgroup). + // + // If empty, the default top-level container's cgroup is used. + // + // For cgroup v2, the only key allowed is "". + SubCgroupPaths map[string]string + + // Scheduler represents the scheduling attributes for a process. + // + // If not empty, takes precedence over container's [configs.Config.Scheduler]. + Scheduler *configs.Scheduler + + // IOPriority is a process I/O priority. + // + // If not empty, takes precedence over container's [configs.Config.IOPriority]. + IOPriority *configs.IOPriority + + CPUAffinity *configs.CPUAffinity +} + +// Wait waits for the process to exit. +// Wait releases any resources associated with the Process +func (p Process) Wait() (*os.ProcessState, error) { + if p.ops == nil { + return nil, errInvalidProcess + } + return p.ops.wait() +} + +// Pid returns the process ID +func (p Process) Pid() (int, error) { + // math.MinInt32 is returned here, because it's invalid value + // for the kill() system call. + if p.ops == nil { + return math.MinInt32, errInvalidProcess + } + return p.ops.pid(), nil +} + +// Signal sends a signal to the Process. +func (p Process) Signal(sig os.Signal) error { + if p.ops == nil { + return errInvalidProcess + } + return p.ops.signal(sig) +} + +// closeClonedExes cleans up any existing cloned binaries associated with the +// Process. +func (p *Process) closeClonedExes() { + for _, exe := range p.clonedExes { + _ = exe.Close() + } + p.clonedExes = nil +} + +// IO holds the process's STDIO +type IO struct { + Stdin io.WriteCloser + Stdout io.ReadCloser + Stderr io.ReadCloser +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go new file mode 100644 index 0000000000..cc50f278ea --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -0,0 +1,1008 @@ +package libcontainer + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "os" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "sync" + "time" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/internal/userns" + "github.com/opencontainers/runc/libcontainer/logs" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" +) + +type parentProcess interface { + // pid returns the pid for the running process. + pid() int + + // start starts the process execution. + start() error + + // send a SIGKILL to the process and wait for the exit. + terminate() error + + // wait waits on the process returning the process state. + wait() (*os.ProcessState, error) + + // startTime returns the process start time. + startTime() (uint64, error) + signal(os.Signal) error + externalDescriptors() []string + setExternalDescriptors(fds []string) + forwardChildLogs() chan error +} + +type processComm struct { + // Used to send initial configuration to "runc init" and for "runc init" to + // indicate that it is ready. + initSockParent *os.File + initSockChild *os.File + // Used for control messages between parent and "runc init". + syncSockParent *syncSocket + syncSockChild *syncSocket + // Used for log forwarding from "runc init" to the parent. + logPipeParent *os.File + logPipeChild *os.File +} + +func newProcessComm() (*processComm, error) { + var ( + comm processComm + err error + ) + comm.initSockParent, comm.initSockChild, err = utils.NewSockPair("init") + if err != nil { + return nil, fmt.Errorf("unable to create init pipe: %w", err) + } + comm.syncSockParent, comm.syncSockChild, err = newSyncSockpair("sync") + if err != nil { + return nil, fmt.Errorf("unable to create sync pipe: %w", err) + } + comm.logPipeParent, comm.logPipeChild, err = os.Pipe() + if err != nil { + return nil, fmt.Errorf("unable to create log pipe: %w", err) + } + return &comm, nil +} + +func (c *processComm) closeChild() { + _ = c.initSockChild.Close() + _ = c.syncSockChild.Close() + _ = c.logPipeChild.Close() +} + +func (c *processComm) closeParent() { + _ = c.initSockParent.Close() + _ = c.syncSockParent.Close() + // c.logPipeParent is kept alive for ForwardLogs +} + +type containerProcess struct { + cmd *exec.Cmd + comm *processComm + config *initConfig + manager cgroups.Manager + fds []string + process *Process + bootstrapData io.Reader + container *Container +} + +func (p *containerProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *containerProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err +} + +func (p *containerProcess) signal(sig os.Signal) error { + s, ok := sig.(unix.Signal) + if !ok { + return errors.New("os: unsupported signal type") + } + return unix.Kill(p.pid(), s) +} + +func (p *containerProcess) externalDescriptors() []string { + return p.fds +} + +func (p *containerProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *containerProcess) forwardChildLogs() chan error { + return logs.ForwardLogs(p.comm.logPipeParent) +} + +// terminate sends a SIGKILL to the forked process for the setns routine then waits to +// avoid the process becoming a zombie. +func (p *containerProcess) terminate() error { + if p.cmd.Process == nil { + return nil + } + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *containerProcess) wait() (*os.ProcessState, error) { //nolint:unparam + err := p.cmd.Wait() + + // Return actual ProcessState even on Wait error + return p.cmd.ProcessState, err +} + +type setnsProcess struct { + containerProcess + cgroupPaths map[string]string + rootlessCgroups bool + intelRdtPath string + initProcessPid int +} + +// tryResetCPUAffinity tries to reset the CPU affinity of the process +// identified by pid to include all possible CPUs (notwithstanding cgroup +// cpuset restrictions and isolated CPUs). +func tryResetCPUAffinity(pid int) { + // When resetting the CPU affinity, we want to match the configured cgroup + // cpuset (or the default set of all CPUs, if no cpuset is configured) + // rather than some more restrictive affinity we were spawned in (such as + // one that may have been inherited from systemd). The cpuset cgroup used + // to reconfigure the cpumask automatically for joining processes, but + // kcommit da019032819a ("sched: Enforce user requested affinity") changed + // this behaviour in Linux 6.2. + // + // Parsing cpuset.cpus.effective is quite inefficient (and looking at + // things like /proc/stat would be wrong for most nested containers), but + // luckily sched_setaffinity(2) will implicitly: + // + // * Clamp the cpumask so that it matches the current number of CPUs on + // the system. + // * Mask out any CPUs that are not a member of the target task's + // configured cgroup cpuset. + // + // So we can just pass a very large array of set cpumask bits and the + // kernel will silently convert that to the correct value very cheaply. + + // Ideally, we would just set the array to 0xFF...FF. Unfortunately, the + // size depends on the architecture. It is also a private newtype, so we + // can't use (^0) or generics since those require us to be able to name the + // type. However, we can just underflow the zero value instead. + // TODO: Once is merged, switch to that. + cpuset := unix.CPUSet{} + for i := range cpuset { + cpuset[i]-- // underflow to 0xFF..FF + } + if err := unix.SchedSetaffinity(pid, &cpuset); err != nil { + logrus.WithError( + os.NewSyscallError("sched_setaffinity", err), + ).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid) + } +} + +// Starts setns process with specified initial CPU affinity. +func (p *setnsProcess) startWithCPUAffinity() error { + aff := p.config.CPUAffinity + if aff == nil || aff.Initial == nil { + return p.cmd.Start() + } + errCh := make(chan error) + defer close(errCh) + + // Use a goroutine to dedicate an OS thread. + go func() { + runtime.LockOSThread() + // Command inherits the CPU affinity. + if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil { + errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err) + return + } + + errCh <- p.cmd.Start() + // Deliberately omit runtime.UnlockOSThread here. + // https://pkg.go.dev/runtime#LockOSThread says: + // "If the calling goroutine exits without unlocking the + // thread, the thread will be terminated". + }() + + return <-errCh +} + +func (p *setnsProcess) setFinalCPUAffinity() error { + aff := p.config.CPUAffinity + // If there was no affinity configured at all, we want to reset + // the affinity to make sure we don't inherit an unexpected one. + if aff == nil || aff.Final == nil && aff.Initial == nil { + tryResetCPUAffinity(p.pid()) + return nil + } + if aff.Final == nil { + return nil + } + if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil { + return fmt.Errorf("error setting final CPU affinity: %w", err) + } + return nil +} + +func (p *setnsProcess) start() (retErr error) { + defer p.comm.closeParent() + + // Get the "before" value of oom kill count. + oom, _ := p.manager.OOMKillCount() + err := p.startWithCPUAffinity() + // Close the child-side of the pipes (controlled by child). + p.comm.closeChild() + if err != nil { + return fmt.Errorf("error starting setns process: %w", err) + } + + defer func() { + if retErr != nil { + if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom { + // Someone in this cgroup was killed, this _might_ be us. + retErr = fmt.Errorf("%w (possibly OOM-killed)", retErr) + } + err := ignoreTerminateErrors(p.terminate()) + if err != nil { + logrus.WithError(err).Warn("unable to terminate setnsProcess") + } + } + }() + + if p.bootstrapData != nil { + if _, err := io.Copy(p.comm.initSockParent, p.bootstrapData); err != nil { + return fmt.Errorf("error copying bootstrap data to pipe: %w", err) + } + } + if err := p.execSetns(); err != nil { + return fmt.Errorf("error executing setns process: %w", err) + } + for _, path := range p.cgroupPaths { + if err := cgroups.WriteCgroupProc(path, p.pid()); err != nil && !p.rootlessCgroups { + // On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY. + // https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643 + // Try to join the cgroup of InitProcessPid. + if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 { + initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid) + initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile) + if initCgErr == nil { + if initCgPath, ok := initCg[""]; ok { + initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath) + logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)", + p.pid(), p.cgroupPaths, err, initCg, initCgDirpath) + // NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container. + err = cgroups.WriteCgroupProc(initCgDirpath, p.pid()) + } + } + } + if err != nil { + return fmt.Errorf("error adding pid %d to cgroups: %w", p.pid(), err) + } + } + } + // Set final CPU affinity right after the process is moved into container's cgroup. + if err := p.setFinalCPUAffinity(); err != nil { + return err + } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return fmt.Errorf("error adding pid %d to Intel RDT: %w", p.pid(), err) + } + } + } + + if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil { + return fmt.Errorf("error writing config to pipe: %w", err) + } + + var seenProcReady bool + ierr := parseSync(p.comm.syncSockParent, func(sync *syncT) error { + switch sync.Type { + case procReady: + seenProcReady = true + // Set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return fmt.Errorf("error setting rlimits for ready process: %w", err) + } + + // Sync with child. + if err := writeSync(p.comm.syncSockParent, procRun); err != nil { + return err + } + case procHooks: + // This shouldn't happen. + panic("unexpected procHooks in setns") + case procMountPlease: + // This shouldn't happen. + panic("unexpected procMountPlease in setns") + case procSeccomp: + if p.config.Config.Seccomp.ListenerPath == "" { + return errors.New("seccomp listenerPath is not set") + } + if sync.Arg == nil { + return fmt.Errorf("sync %q is missing an argument", sync.Type) + } + var srcFd int + if err := json.Unmarshal(*sync.Arg, &srcFd); err != nil { + return fmt.Errorf("sync %q passed invalid fd arg: %w", sync.Type, err) + } + seccompFd, err := pidGetFd(p.pid(), srcFd) + if err != nil { + return fmt.Errorf("sync %q get fd %d from child failed: %w", sync.Type, srcFd, err) + } + defer seccompFd.Close() + // We have a copy, the child can keep working. We don't need to + // wait for the seccomp notify listener to get the fd before we + // permit the child to continue because the child will happily wait + // for the listener if it hits SCMP_ACT_NOTIFY. + if err := writeSync(p.comm.syncSockParent, procSeccompDone); err != nil { + return err + } + + bundle, annotations := utils.Annotations(p.config.Config.Labels) + containerProcessState := &specs.ContainerProcessState{ + Version: specs.Version, + Fds: []string{specs.SeccompFdName}, + Pid: p.cmd.Process.Pid, + Metadata: p.config.Config.Seccomp.ListenerMetadata, + State: specs.State{ + Version: specs.Version, + ID: p.config.ContainerID, + Status: specs.StateRunning, + Pid: p.initProcessPid, + Bundle: bundle, + Annotations: annotations, + }, + } + if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath, + containerProcessState, seccompFd); err != nil { + return err + } + default: + return errors.New("invalid JSON payload from child") + } + return nil + }) + + if err := p.comm.syncSockParent.Shutdown(unix.SHUT_WR); err != nil && ierr == nil { + return err + } + if !seenProcReady && ierr == nil { + ierr = errors.New("procReady not received") + } + // Must be done after Shutdown so the child will exit and we can wait for it. + if ierr != nil { + _, _ = p.wait() + return ierr + } + return nil +} + +// execSetns runs the process that executes C code to perform the setns calls +// because setns support requires the C process to fork off a child and perform the setns +// before the go runtime boots, we wait on the process to die and receive the child's pid +// over the provided pipe. +func (p *setnsProcess) execSetns() error { + status, err := p.cmd.Process.Wait() + if err != nil { + _ = p.cmd.Wait() + return fmt.Errorf("error waiting on setns process to finish: %w", err) + } + if !status.Success() { + _ = p.cmd.Wait() + return &exec.ExitError{ProcessState: status} + } + var pid *pid + if err := json.NewDecoder(p.comm.initSockParent).Decode(&pid); err != nil { + _ = p.cmd.Wait() + return fmt.Errorf("error reading pid from init pipe: %w", err) + } + + // Clean up the zombie parent process + // On Unix systems FindProcess always succeeds. + firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) + + // Ignore the error in case the child has already been reaped for any reason + _, _ = firstChildProcess.Wait() + + process, err := os.FindProcess(pid.Pid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +type initProcess struct { + containerProcess + intelRdtManager *intelrdt.Manager +} + +// getChildPid receives the final child's pid over the provided pipe. +func (p *initProcess) getChildPid() (int, error) { + var pid pid + if err := json.NewDecoder(p.comm.initSockParent).Decode(&pid); err != nil { + _ = p.cmd.Wait() + return -1, err + } + + // Clean up the zombie parent process + // On Unix systems FindProcess always succeeds. + firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) + + // Ignore the error in case the child has already been reaped for any reason + _, _ = firstChildProcess.Wait() + + return pid.Pid, nil +} + +func (p *initProcess) waitForChildExit(childPid int) error { + status, err := p.cmd.Process.Wait() + if err != nil { + _ = p.cmd.Wait() + return err + } + if !status.Success() { + _ = p.cmd.Wait() + return &exec.ExitError{ProcessState: status} + } + + process, err := os.FindProcess(childPid) + if err != nil { + return err + } + p.cmd.Process = process + p.process.ops = p + return nil +} + +type mountSourceRequestFn func(*configs.Mount) (*mountSource, error) + +// goCreateMountSources spawns a goroutine which creates open_tree(2)-style +// mountfds based on the requested configs.Mount configuration. The returned +// requestFn and cancelFn are used to interact with the goroutine. +// +// The caller of the returned mountSourceRequestFn is responsible for closing +// the returned file. +func (p *initProcess) goCreateMountSources(ctx context.Context) (mountSourceRequestFn, context.CancelFunc, error) { + type response struct { + src *mountSource + err error + } + + errCh := make(chan error, 1) + requestCh := make(chan *configs.Mount) + responseCh := make(chan response) + + ctx, cancelFn := context.WithTimeout(ctx, 1*time.Minute) + go func() { + // We lock this thread because we need to setns(2) here. There is no + // UnlockOSThread() here, to ensure that the Go runtime will kill this + // thread once this goroutine returns (ensuring no other goroutines run + // in this context). + runtime.LockOSThread() + + // Detach from the shared fs of the rest of the Go process in order to + // be able to CLONE_NEWNS. + if err := unix.Unshare(unix.CLONE_FS); err != nil { + err = os.NewSyscallError("unshare(CLONE_FS)", err) + errCh <- fmt.Errorf("mount source thread: %w", err) + return + } + + // Attach to the container's mount namespace. + nsFd, err := os.Open(fmt.Sprintf("/proc/%d/ns/mnt", p.pid())) + if err != nil { + errCh <- fmt.Errorf("mount source thread: open container mntns: %w", err) + return + } + defer nsFd.Close() + if err := unix.Setns(int(nsFd.Fd()), unix.CLONE_NEWNS); err != nil { + err = os.NewSyscallError("setns", err) + errCh <- fmt.Errorf("mount source thread: join container mntns: %w", err) + return + } + + // No errors during setup! + close(errCh) + logrus.Debugf("mount source thread: successfully running in container mntns") + + nsHandles := new(userns.Handles) + defer nsHandles.Release() + loop: + for { + select { + case m, ok := <-requestCh: + if !ok { + break loop + } + src, err := mountFd(nsHandles, m) + logrus.Debugf("mount source thread: handling request for %q: %v %v", m.Source, src, err) + responseCh <- response{ + src: src, + err: err, + } + case <-ctx.Done(): + break loop + } + } + logrus.Debugf("mount source thread: closing thread: %v", ctx.Err()) + close(responseCh) + }() + + // Check for setup errors. + err := <-errCh + if err != nil { + cancelFn() + return nil, nil, err + } + + // TODO: Switch to context.AfterFunc when we switch to Go 1.21. + var requestChCloseOnce sync.Once + requestFn := func(m *configs.Mount) (*mountSource, error) { + var err error + select { + case requestCh <- m: + select { + case resp, ok := <-responseCh: + if ok { + return resp.src, resp.err + } + case <-ctx.Done(): + err = fmt.Errorf("receive mount source context cancelled: %w", ctx.Err()) + } + case <-ctx.Done(): + err = fmt.Errorf("send mount request cancelled: %w", ctx.Err()) + } + requestChCloseOnce.Do(func() { close(requestCh) }) + return nil, err + } + return requestFn, cancelFn, nil +} + +func (p *initProcess) start() (retErr error) { + defer p.comm.closeParent() + err := p.cmd.Start() + p.process.ops = p + // close the child-side of the pipes (controlled by child) + p.comm.closeChild() + if err != nil { + p.process.ops = nil + return fmt.Errorf("unable to start init: %w", err) + } + + // If the runc-create process is terminated due to receiving SIGKILL signal, + // it may lead to the runc-init process leaking due + // to issues like cgroup freezing, + // and it cannot be cleaned up by runc delete/stop + // because the container lacks a state.json file. + // This typically occurs when higher-level + // container runtimes terminate the runc create process due to context cancellation or timeout. + // If the runc-create process terminates due to SIGKILL before + // reaching this line of code, we won't encounter the cgroup freezing issue. + _, err = p.container.updateState(nil) + if err != nil { + return fmt.Errorf("unable to store init state before creating cgroup: %w", err) + } + + defer func() { + if retErr != nil { + // Find out if init is killed by the kernel's OOM killer. + // Get the count before killing init as otherwise cgroup + // might be removed by systemd. + oom, err := p.manager.OOMKillCount() + if err != nil { + logrus.WithError(err).Warn("unable to get oom kill count") + } else if oom > 0 { + // Does not matter what the particular error was, + // its cause is most probably OOM, so report that. + const oomError = "container init was OOM-killed (memory limit too low?)" + + if logrus.GetLevel() >= logrus.DebugLevel { + // Only show the original error if debug is set, + // as it is not generally very useful. + retErr = fmt.Errorf(oomError+": %w", retErr) + } else { + retErr = errors.New(oomError) + } + } + + // Terminate the process to ensure we can remove cgroups. + if err := ignoreTerminateErrors(p.terminate()); err != nil { + logrus.WithError(err).Warn("unable to terminate initProcess") + } + + _ = p.manager.Destroy() + if p.intelRdtManager != nil { + _ = p.intelRdtManager.Destroy() + } + } + }() + + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. + if err := p.manager.Apply(p.pid()); err != nil { + if errors.Is(err, cgroups.ErrRootless) { + // ErrRootless is to be ignored except when + // the container doesn't have private pidns. + if !p.config.Config.Namespaces.IsPrivate(configs.NEWPID) { + // TODO: make this an error in runc 1.3. + logrus.Warn("Creating a rootless container with no cgroup and no private pid namespace. " + + "Such configuration is strongly discouraged (as it is impossible to properly kill all container's processes) " + + "and will result in an error in a future runc version.") + } + } else { + return fmt.Errorf("unable to apply cgroup configuration: %w", err) + } + } + // Reset the CPU affinity after cgroups are configured to make sure it + // matches any configured cpuset. + tryResetCPUAffinity(p.pid()) + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(p.pid()); err != nil { + return fmt.Errorf("unable to apply Intel RDT configuration: %w", err) + } + } + if _, err := io.Copy(p.comm.initSockParent, p.bootstrapData); err != nil { + return fmt.Errorf("can't copy bootstrap data to pipe: %w", err) + } + + childPid, err := p.getChildPid() + if err != nil { + return fmt.Errorf("can't get final child's PID from pipe: %w", err) + } + + // Save the standard descriptor names before the container process + // can potentially move them (e.g., via dup2()). If we don't do this now, + // we won't know at checkpoint time which file descriptor to look up. + fds, err := getPipeFds(childPid) + if err != nil { + return fmt.Errorf("error getting pipe fds for pid %d: %w", childPid, err) + } + p.setExternalDescriptors(fds) + + // Wait for our first child to exit + if err := p.waitForChildExit(childPid); err != nil { + return fmt.Errorf("error waiting for our first child to exit: %w", err) + } + + // Spin up a goroutine to handle remapping mount requests by runc init. + // There is no point doing this for rootless containers because they cannot + // configure MOUNT_ATTR_IDMAP, nor do OPEN_TREE_CLONE. We could just + // service plain-open requests for plain bind-mounts but there's no need + // (rootless containers will never have permission issues on a source mount + // that the parent process can help with -- they are the same user). + var mountRequest mountSourceRequestFn + if !p.container.config.RootlessEUID { + request, cancel, err := p.goCreateMountSources(context.Background()) + if err != nil { + return fmt.Errorf("error spawning mount remapping thread: %w", err) + } + defer cancel() + mountRequest = request + } + + if err := p.createNetworkInterfaces(); err != nil { + return fmt.Errorf("error creating network interfaces: %w", err) + } + + // initConfig.SpecState is only needed to run hooks that are executed + // inside a container, i.e. CreateContainer and StartContainer. + if p.config.Config.HasHook(configs.CreateContainer, configs.StartContainer) { + p.config.SpecState, err = p.container.currentOCIState() + if err != nil { + return fmt.Errorf("error getting current state: %w", err) + } + } + + if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil { + return fmt.Errorf("error sending config to init process: %w", err) + } + + var seenProcReady bool + ierr := parseSync(p.comm.syncSockParent, func(sync *syncT) error { + switch sync.Type { + case procMountPlease: + if mountRequest == nil { + return fmt.Errorf("cannot fulfil mount requests as a rootless user") + } + var m *configs.Mount + if sync.Arg == nil { + return fmt.Errorf("sync %q is missing an argument", sync.Type) + } + if err := json.Unmarshal(*sync.Arg, &m); err != nil { + return fmt.Errorf("sync %q passed invalid mount arg: %w", sync.Type, err) + } + mnt, err := mountRequest(m) + if err != nil { + return fmt.Errorf("failed to fulfil mount request: %w", err) + } + defer mnt.file.Close() + + arg, err := json.Marshal(mnt) + if err != nil { + return fmt.Errorf("sync %q failed to marshal mountSource: %w", sync.Type, err) + } + argMsg := json.RawMessage(arg) + if err := doWriteSync(p.comm.syncSockParent, syncT{ + Type: procMountFd, + Arg: &argMsg, + File: mnt.file, + }); err != nil { + return err + } + case procSeccomp: + if p.config.Config.Seccomp.ListenerPath == "" { + return errors.New("seccomp listenerPath is not set") + } + var srcFd int + if sync.Arg == nil { + return fmt.Errorf("sync %q is missing an argument", sync.Type) + } + if err := json.Unmarshal(*sync.Arg, &srcFd); err != nil { + return fmt.Errorf("sync %q passed invalid fd arg: %w", sync.Type, err) + } + seccompFd, err := pidGetFd(p.pid(), srcFd) + if err != nil { + return fmt.Errorf("sync %q get fd %d from child failed: %w", sync.Type, srcFd, err) + } + defer seccompFd.Close() + // We have a copy, the child can keep working. We don't need to + // wait for the seccomp notify listener to get the fd before we + // permit the child to continue because the child will happily wait + // for the listener if it hits SCMP_ACT_NOTIFY. + if err := writeSync(p.comm.syncSockParent, procSeccompDone); err != nil { + return err + } + + s, err := p.container.currentOCIState() + if err != nil { + return err + } + + // initProcessStartTime hasn't been set yet. + s.Pid = p.cmd.Process.Pid + s.Status = specs.StateCreating + containerProcessState := &specs.ContainerProcessState{ + Version: specs.Version, + Fds: []string{specs.SeccompFdName}, + Pid: s.Pid, + Metadata: p.config.Config.Seccomp.ListenerMetadata, + State: *s, + } + if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath, + containerProcessState, seccompFd); err != nil { + return err + } + case procReady: + seenProcReady = true + // Set rlimits, this has to be done here because we lose permissions + // to raise the limits once we enter a user-namespace + if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { + return fmt.Errorf("error setting rlimits for ready process: %w", err) + } + + // generate a timestamp indicating when the container was started + p.container.created = time.Now().UTC() + p.container.state = &createdState{ + c: p.container, + } + + // NOTE: If the procRun state has been synced and the + // runc-create process has been killed for some reason, + // the runc-init[2:stage] process will be leaky. And + // the runc command also fails to parse root directory + // because the container doesn't have state.json. + // + // In order to cleanup the runc-init[2:stage] by + // runc-delete/stop, we should store the status before + // procRun sync. + state, uerr := p.container.updateState(p) + if uerr != nil { + return fmt.Errorf("unable to store init state: %w", uerr) + } + p.container.initProcessStartTime = state.InitProcessStartTime + + // Sync with child. + if err := writeSync(p.comm.syncSockParent, procRun); err != nil { + return err + } + case procHooks: + // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. + if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil { + return fmt.Errorf("error setting cgroup config for procHooks process: %w", err) + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return fmt.Errorf("error setting Intel RDT config for procHooks process: %w", err) + } + } + if p.config.Config.HasHook(configs.Prestart, configs.CreateRuntime) { + s, err := p.container.currentOCIState() + if err != nil { + return err + } + // initProcessStartTime hasn't been set yet. + s.Pid = p.cmd.Process.Pid + s.Status = specs.StateCreating + hooks := p.config.Config.Hooks + + if err := hooks.Run(configs.Prestart, s); err != nil { + return err + } + if err := hooks.Run(configs.CreateRuntime, s); err != nil { + return err + } + } + // Sync with child. + if err := writeSync(p.comm.syncSockParent, procHooksDone); err != nil { + return err + } + default: + return errors.New("invalid JSON payload from child") + } + return nil + }) + + if err := p.comm.syncSockParent.Shutdown(unix.SHUT_WR); err != nil && ierr == nil { + return err + } + if !seenProcReady && ierr == nil { + ierr = errors.New("procReady not received") + } + if ierr != nil { + return fmt.Errorf("error during container init: %w", ierr) + } + return nil +} + +func (p *initProcess) createNetworkInterfaces() error { + for _, config := range p.config.Config.Networks { + strategy, err := getStrategy(config.Type) + if err != nil { + return err + } + n := &network{ + Network: *config, + } + if err := strategy.create(n, p.pid()); err != nil { + return err + } + p.config.Networks = append(p.config.Networks, n) + } + return nil +} + +func pidGetFd(pid, srcFd int) (*os.File, error) { + pidFd, err := unix.PidfdOpen(pid, 0) + if err != nil { + return nil, os.NewSyscallError("pidfd_open", err) + } + defer unix.Close(pidFd) + fd, err := unix.PidfdGetfd(pidFd, srcFd, 0) + if err != nil { + return nil, os.NewSyscallError("pidfd_getfd", err) + } + return os.NewFile(uintptr(fd), "[pidfd_getfd]"), nil +} + +func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, file *os.File) error { + conn, err := net.Dial("unix", listenerPath) + if err != nil { + return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err) + } + defer conn.Close() + + socket, err := conn.(*net.UnixConn).File() + if err != nil { + return fmt.Errorf("cannot get seccomp socket: %w", err) + } + defer socket.Close() + + b, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("cannot marshall seccomp state: %w", err) + } + + if err := utils.SendRawFd(socket, string(b), file.Fd()); err != nil { + return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err) + } + runtime.KeepAlive(file) + return nil +} + +func getPipeFds(pid int) ([]string, error) { + fds := make([]string, 3) + + dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") + for i := 0; i < 3; i++ { + // XXX: This breaks if the path is not a valid symlink (which can + // happen in certain particularly unlucky mount namespace setups). + f := filepath.Join(dirPath, strconv.Itoa(i)) + target, err := os.Readlink(f) + if err != nil { + // Ignore permission errors, for rootless containers and other + // non-dumpable processes. if we can't get the fd for a particular + // file, there's not much we can do. + if os.IsPermission(err) { + continue + } + return fds, err + } + fds[i] = target + } + return fds, nil +} + +// InitializeIO creates pipes for use with the process's stdio and returns the +// opposite side for each. Do not use this if you want to have a pseudoterminal +// set up for you by libcontainer (TODO: fix that too). +// TODO: This is mostly unnecessary, and should be handled by clients. +func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { + var fds []uintptr + i = &IO{} + // cleanup in case of an error + defer func() { + if err != nil { + for _, fd := range fds { + _ = unix.Close(int(fd)) + } + } + }() + // STDIN + r, w, err := os.Pipe() + if err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdin, i.Stdin = r, w + // STDOUT + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stdout, i.Stdout = w, r + // STDERR + if r, w, err = os.Pipe(); err != nil { + return nil, err + } + fds = append(fds, r.Fd(), w.Fd()) + p.Stderr, i.Stderr = w, r + // change ownership of the pipes in case we are in a user namespace + for _, fd := range fds { + if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil { + return nil, &os.PathError{Op: "fchown", Path: "fd " + strconv.Itoa(int(fd)), Err: err} + } + } + return i, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go new file mode 100644 index 0000000000..2e81cdf68f --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go @@ -0,0 +1,128 @@ +package libcontainer + +import ( + "errors" + "os" + "os/exec" + + "github.com/opencontainers/runc/libcontainer/system" +) + +func newRestoredProcess(cmd *exec.Cmd, fds []string) (*restoredProcess, error) { + var err error + pid := cmd.Process.Pid + stat, err := system.Stat(pid) + if err != nil { + return nil, err + } + return &restoredProcess{ + cmd: cmd, + processStartTime: stat.StartTime, + fds: fds, + }, nil +} + +type restoredProcess struct { + cmd *exec.Cmd + processStartTime uint64 + fds []string +} + +func (p *restoredProcess) start() error { + return errors.New("restored process cannot be started") +} + +func (p *restoredProcess) pid() int { + return p.cmd.Process.Pid +} + +func (p *restoredProcess) terminate() error { + err := p.cmd.Process.Kill() + if _, werr := p.wait(); err == nil { + err = werr + } + return err +} + +func (p *restoredProcess) wait() (*os.ProcessState, error) { + // TODO: how do we wait on the actual process? + // maybe use --exec-cmd in criu + err := p.cmd.Wait() + if err != nil { + var exitErr *exec.ExitError + if !errors.As(err, &exitErr) { + return nil, err + } + } + st := p.cmd.ProcessState + return st, nil +} + +func (p *restoredProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *restoredProcess) signal(s os.Signal) error { + return p.cmd.Process.Signal(s) +} + +func (p *restoredProcess) externalDescriptors() []string { + return p.fds +} + +func (p *restoredProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *restoredProcess) forwardChildLogs() chan error { + return nil +} + +// nonChildProcess represents a process where the calling process is not +// the parent process. This process is created when Load loads a container +// from a persisted state. +type nonChildProcess struct { + processPid int + processStartTime uint64 + fds []string +} + +func (p *nonChildProcess) start() error { + return errors.New("restored process cannot be started") +} + +func (p *nonChildProcess) pid() int { + return p.processPid +} + +func (p *nonChildProcess) terminate() error { + return errors.New("restored process cannot be terminated") +} + +func (p *nonChildProcess) wait() (*os.ProcessState, error) { + return nil, errors.New("restored process cannot be waited on") +} + +func (p *nonChildProcess) startTime() (uint64, error) { + return p.processStartTime, nil +} + +func (p *nonChildProcess) signal(s os.Signal) error { + proc, err := os.FindProcess(p.processPid) + if err != nil { + return err + } + return proc.Signal(s) +} + +func (p *nonChildProcess) externalDescriptors() []string { + return p.fds +} + +func (p *nonChildProcess) setExternalDescriptors(newFds []string) { + p.fds = newFds +} + +func (p *nonChildProcess) forwardChildLogs() chan error { + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go new file mode 100644 index 0000000000..8fc5a901dc --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -0,0 +1,1483 @@ +package libcontainer + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "syscall" + "time" + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" + "github.com/moby/sys/mountinfo" + "github.com/moby/sys/userns" + "github.com/mrunalp/fileutils" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux/label" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/cgroups" + devices "github.com/opencontainers/cgroups/devices/config" + "github.com/opencontainers/cgroups/fs2" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/utils" +) + +const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + +// mountConfig contains mount data not specific to a mount point. +type mountConfig struct { + root string + label string + cgroup2Path string + rootlessCgroups bool + cgroupns bool +} + +// mountEntry contains mount data specific to a mount point. +type mountEntry struct { + *configs.Mount + srcFile *mountSource + dstFile *os.File +} + +// srcName is only meant for error messages, it returns a "friendly" name. +func (m mountEntry) srcName() string { + if m.srcFile != nil { + return m.srcFile.file.Name() + } + return m.Source +} + +func (m mountEntry) srcStat() (os.FileInfo, *syscall.Stat_t, error) { + var ( + st os.FileInfo + err error + ) + if m.srcFile != nil { + st, err = m.srcFile.file.Stat() + } else { + st, err = os.Stat(m.Source) + } + if err != nil { + return nil, nil, err + } + return st, st.Sys().(*syscall.Stat_t), nil +} + +func (m mountEntry) srcStatfs() (*unix.Statfs_t, error) { + var st unix.Statfs_t + if m.srcFile != nil { + if err := unix.Fstatfs(int(m.srcFile.file.Fd()), &st); err != nil { + return nil, os.NewSyscallError("fstatfs", err) + } + } else { + if err := unix.Statfs(m.Source, &st); err != nil { + return nil, &os.PathError{Op: "statfs", Path: m.Source, Err: err} + } + } + return &st, nil +} + +// needsSetupDev returns true if /dev needs to be set up. +func needsSetupDev(config *configs.Config) bool { + for _, m := range config.Mounts { + if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" { + return false + } + } + return true +} + +// prepareRootfs sets up the devices, mount points, and filesystems for use +// inside a new mount namespace. It doesn't set anything as ro. You must call +// finalizeRootfs after this function to finish setting up the rootfs. +func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) { + config := iConfig.Config + if err := prepareRoot(config); err != nil { + return fmt.Errorf("error preparing rootfs: %w", err) + } + + mountConfig := &mountConfig{ + root: config.Rootfs, + label: config.MountLabel, + cgroup2Path: iConfig.Cgroup2Path, + rootlessCgroups: config.RootlessCgroups, + cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), + } + for _, m := range config.Mounts { + entry := mountEntry{Mount: m} + // Figure out whether we need to request runc to give us an + // open_tree(2)-style mountfd. For idmapped mounts, this is always + // necessary. For bind-mounts, this is only necessary if we cannot + // resolve the parent mount (this is only hit if you are running in a + // userns -- but for rootless the host-side thread can't help). + wantSourceFile := m.IsIDMapped() + if m.IsBind() && !config.RootlessEUID { + if _, err := os.Stat(m.Source); err != nil { + wantSourceFile = true + } + } + if wantSourceFile { + // Request a source file from the host. + if err := writeSyncArg(pipe, procMountPlease, m); err != nil { + return fmt.Errorf("failed to request mountfd for %q: %w", m.Source, err) + } + sync, err := readSyncFull(pipe, procMountFd) + if err != nil { + return fmt.Errorf("mountfd request for %q failed: %w", m.Source, err) + } + if sync.File == nil { + return fmt.Errorf("mountfd request for %q: response missing attached fd", m.Source) + } + defer sync.File.Close() + // Sanity-check to make sure we didn't get the wrong fd back. Note + // that while m.Source might contain symlinks, the (*os.File).Name + // is based on the path provided to os.OpenFile, not what it + // resolves to. So this should never happen. + if sync.File.Name() != m.Source { + return fmt.Errorf("returned mountfd for %q doesn't match requested mount configuration: mountfd path is %q", m.Source, sync.File.Name()) + } + // Unmarshal the procMountFd argument (the file is sync.File). + var src *mountSource + if sync.Arg == nil { + return fmt.Errorf("sync %q is missing an argument", sync.Type) + } + if err := json.Unmarshal(*sync.Arg, &src); err != nil { + return fmt.Errorf("invalid mount fd response argument %q: %w", string(*sync.Arg), err) + } + if src == nil { + return fmt.Errorf("mountfd request for %q: no mount source info received", m.Source) + } + src.file = sync.File + entry.srcFile = src + } + if err := mountToRootfs(mountConfig, entry); err != nil { + return fmt.Errorf("error mounting %q to rootfs at %q: %w", m.Source, m.Destination, err) + } + } + + setupDev := needsSetupDev(config) + if setupDev { + if err := createDevices(config); err != nil { + return fmt.Errorf("error creating device nodes: %w", err) + } + if err := setupPtmx(config); err != nil { + return fmt.Errorf("error setting up ptmx: %w", err) + } + if err := setupDevSymlinks(config.Rootfs); err != nil { + return fmt.Errorf("error setting up /dev symlinks: %w", err) + } + } + + // Signal the parent to run the pre-start hooks. + // The hooks are run after the mounts are setup, but before we switch to the new + // root, so that the old root is still available in the hooks for any mount + // manipulations. + // Note that iConfig.Cwd is not guaranteed to exist here. + if err := syncParentHooks(pipe); err != nil { + return err + } + + // The reason these operations are done here rather than in finalizeRootfs + // is because the console-handling code gets quite sticky if we have to set + // up the console before doing the pivot_root(2). This is because the + // Console API has to also work with the ExecIn case, which means that the + // API must be able to deal with being inside as well as outside the + // container. It's just cleaner to do this here (at the expense of the + // operation not being perfectly split). + + if err := unix.Chdir(config.Rootfs); err != nil { + return &os.PathError{Op: "chdir", Path: config.Rootfs, Err: err} + } + + if s := iConfig.SpecState; s != nil { + s.Pid = unix.Getpid() + s.Status = specs.StateCreating + if err := iConfig.Config.Hooks.Run(configs.CreateContainer, s); err != nil { + return err + } + } + + if config.NoPivotRoot { + err = msMoveRoot(config.Rootfs) + } else if config.Namespaces.Contains(configs.NEWNS) { + err = pivotRoot(config.Rootfs) + } else { + err = chroot() + } + if err != nil { + return fmt.Errorf("error jailing process inside rootfs: %w", err) + } + + // Apply root mount propagation flags. + // This must be done after pivot_root/chroot because the mount propagation flag is applied + // to the current root ("/"), and not to the old rootfs before it becomes "/". Applying the + // flag in prepareRoot would affect the host mount namespace if the container's + // root mount is shared. + // MS_PRIVATE is skipped as rootfsParentMountPrivate() is already called. + if config.RootPropagation != 0 && config.RootPropagation&unix.MS_PRIVATE == 0 { + if err := mount("", "/", "", uintptr(config.RootPropagation), ""); err != nil { + return fmt.Errorf("unable to apply root propagation flags: %w", err) + } + } + + if setupDev { + if err := reOpenDevNull(); err != nil { + return fmt.Errorf("error reopening /dev/null inside container: %w", err) + } + } + + if cwd := iConfig.Cwd; cwd != "" { + // Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...". + // However, we are safe to call MkDirAll directly because we are in the jail here. + if err := os.MkdirAll(cwd, 0o755); err != nil { + return err + } + } + + return nil +} + +// finalizeRootfs sets anything to ro if necessary. You must call +// prepareRootfs first. +func finalizeRootfs(config *configs.Config) (err error) { + // All tmpfs mounts and /dev were previously mounted as rw + // by mountPropagate. Remount them read-only as requested. + for _, m := range config.Mounts { + if m.Flags&unix.MS_RDONLY != unix.MS_RDONLY { + continue + } + if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" { + if err := remountReadonly(m); err != nil { + return err + } + } + } + + // set rootfs ( / ) as readonly + if config.Readonlyfs { + if err := setReadonly(); err != nil { + return fmt.Errorf("error setting rootfs as readonly: %w", err) + } + } + + if config.Umask != nil { + unix.Umask(int(*config.Umask)) + } else { + unix.Umask(0o022) + } + return nil +} + +// /tmp has to be mounted as private to allow MS_MOVE to work in all situations +func prepareTmp(topTmpDir string) (string, error) { + tmpdir, err := os.MkdirTemp(topTmpDir, "runctop") + if err != nil { + return "", err + } + if err := mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil { + return "", err + } + if err := mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil { + return "", err + } + return tmpdir, nil +} + +func cleanupTmp(tmpdir string) { + _ = unix.Unmount(tmpdir, 0) + _ = os.RemoveAll(tmpdir) +} + +func mountCgroupV1(m mountEntry, c *mountConfig) error { + binds, err := getCgroupMounts(m.Mount) + if err != nil { + return err + } + var merged []string + for _, b := range binds { + ss := filepath.Base(b.Destination) + if strings.Contains(ss, ",") { + merged = append(merged, ss) + } + } + tmpfs := &configs.Mount{ + Source: "tmpfs", + Device: "tmpfs", + Destination: m.Destination, + Flags: defaultMountFlags, + Data: "mode=755", + PropagationFlags: m.PropagationFlags, + } + + if err := mountToRootfs(c, mountEntry{Mount: tmpfs}); err != nil { + return err + } + + for _, b := range binds { + if c.cgroupns { + // We just created the tmpfs, and so we can just use filepath.Join + // here (not to mention we want to make sure we create the path + // inside the tmpfs, so we don't want to resolve symlinks). + subsystemPath := filepath.Join(c.root, b.Destination) + subsystemName := filepath.Base(b.Destination) + if err := pathrs.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { + return err + } + if err := utils.WithProcfd(c.root, b.Destination, func(dstFd string) error { + flags := defaultMountFlags + if m.Flags&unix.MS_RDONLY != 0 { + flags = flags | unix.MS_RDONLY + } + var ( + source = "cgroup" + data = subsystemName + ) + if data == "systemd" { + data = cgroups.CgroupNamePrefix + data + source = "systemd" + } + return mountViaFds(source, nil, b.Destination, dstFd, "cgroup", uintptr(flags), data) + }); err != nil { + return err + } + } else { + if err := mountToRootfs(c, mountEntry{Mount: b}); err != nil { + return err + } + } + } + for _, mc := range merged { + for _, ss := range strings.Split(mc, ",") { + // symlink(2) is very dumb, it will just shove the path into + // the link and doesn't do any checks or relative path + // conversion. Also, don't error out if the cgroup already exists. + if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) { + return err + } + } + } + return nil +} + +func mountCgroupV2(m mountEntry, c *mountConfig) error { + err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, nil, m.Destination, dstFd, "cgroup2", uintptr(m.Flags), m.Data) + }) + if err == nil || (!errors.Is(err, unix.EPERM) && !errors.Is(err, unix.EBUSY)) { + return err + } + + // When we are in UserNS but CgroupNS is not unshared, we cannot mount + // cgroup2 (#2158), so fall back to bind mount. + bindM := &configs.Mount{ + Device: "bind", + Source: fs2.UnifiedMountpoint, + Destination: m.Destination, + Flags: unix.MS_BIND | m.Flags, + PropagationFlags: m.PropagationFlags, + } + if c.cgroupns && c.cgroup2Path != "" { + // Emulate cgroupns by bind-mounting the container cgroup path + // rather than the whole /sys/fs/cgroup. + bindM.Source = c.cgroup2Path + } + // mountToRootfs() handles remounting for MS_RDONLY. + err = mountToRootfs(c, mountEntry{Mount: bindM}) + if c.rootlessCgroups && errors.Is(err, unix.ENOENT) { + // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed + // outside the userns+mountns. + // + // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted + // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`). + err = utils.WithProcfdFile(m.dstFile, func(procfd string) error { + return maskPaths([]string{procfd}, c.label) + }) + } + return err +} + +func doTmpfsCopyUp(m mountEntry, mountLabel string) (Err error) { + // Set up a scratch dir for the tmpfs on the host. + tmpdir, err := prepareTmp("/tmp") + if err != nil { + return fmt.Errorf("tmpcopyup: failed to setup tmpdir: %w", err) + } + defer cleanupTmp(tmpdir) + tmpDir, err := os.MkdirTemp(tmpdir, "runctmpdir") + if err != nil { + return fmt.Errorf("tmpcopyup: failed to create tmpdir: %w", err) + } + defer os.RemoveAll(tmpDir) + + tmpDirFile, err := os.OpenFile(tmpDir, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("tmpcopyup: %w", err) + } + defer tmpDirFile.Close() + + // Configure the *host* tmpdir as if it's the container mount. We change + // m.dstFile since we are going to mount *on the host*. + hostMount := mountEntry{ + Mount: m.Mount, + dstFile: tmpDirFile, + } + if err := hostMount.mountPropagate("/", mountLabel); err != nil { + return err + } + defer func() { + if Err != nil { + if err := unmount(tmpDir, unix.MNT_DETACH); err != nil { + logrus.Warnf("tmpcopyup: %v", err) + } + } + }() + + return utils.WithProcfdFile(m.dstFile, func(dstFd string) (Err error) { + // Copy the container data to the host tmpdir. We append "/" to force + // CopyDirectory to resolve the symlink rather than trying to copy the + // symlink itself. + if err := fileutils.CopyDirectory(dstFd+"/", tmpDir); err != nil { + return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, dstFd, tmpDir, err) + } + // Now move the mount into the container. + if err := mountViaFds(tmpDir, nil, m.Destination, dstFd, "", unix.MS_MOVE, ""); err != nil { + return fmt.Errorf("tmpcopyup: failed to move mount: %w", err) + } + return nil + }) +} + +const ( + // The atime "enum" flags (which are mutually exclusive). + mntAtimeEnumFlags = unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME + // All atime-related flags. + mntAtimeFlags = mntAtimeEnumFlags | unix.MS_NODIRATIME + // Flags which can be locked when inheriting mounts in a different userns. + // In the kernel, these are the mounts that are locked using MNT_LOCK_*. + mntLockFlags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | + unix.MS_NOSUID | mntAtimeFlags +) + +func statfsToMountFlags(st unix.Statfs_t) int { + // From . + const ST_NOSYMFOLLOW = 0x2000 //nolint:revive + + var flags int + for _, f := range []struct { + st, ms int + }{ + // See calculate_f_flags() in fs/statfs.c. + {unix.ST_RDONLY, unix.MS_RDONLY}, + {unix.ST_NOSUID, unix.MS_NOSUID}, + {unix.ST_NODEV, unix.MS_NODEV}, + {unix.ST_NOEXEC, unix.MS_NOEXEC}, + {unix.ST_MANDLOCK, unix.MS_MANDLOCK}, + {unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS}, + {unix.ST_NOATIME, unix.MS_NOATIME}, + {unix.ST_NODIRATIME, unix.MS_NODIRATIME}, + {unix.ST_RELATIME, unix.MS_RELATIME}, + {ST_NOSYMFOLLOW, unix.MS_NOSYMFOLLOW}, + // There is no ST_STRICTATIME -- see below. + } { + if int(st.Flags)&f.st == f.st { + flags |= f.ms + } + } + // MS_STRICTATIME is a "fake" MS_* flag. It isn't stored in mnt->mnt_flags, + // and so it doesn't show up in statfs(2). If none of the other flags in + // atime enum are present, the mount is MS_STRICTATIME. + if flags&mntAtimeEnumFlags == 0 { + flags |= unix.MS_STRICTATIME + } + return flags +} + +var errRootfsToFile = errors.New("config tries to change rootfs to file") + +func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { + unsafePath := utils.StripRoot(rootfs, m.Destination) + dstFile, err := pathrs.OpenInRoot(rootfs, unsafePath, unix.O_PATH) + defer func() { + if dstFile != nil && Err != nil { + _ = dstFile.Close() + } + }() + if err != nil { + if !errors.Is(err, unix.ENOENT) { + return fmt.Errorf("lookup mountpoint target: %w", err) + } + + // If the mountpoint doesn't already exist, we want to create a mountpoint + // that makes sense for the source. For file bind-mounts this is an empty + // file, for everything else it's a directory. + dstIsFile := false + if m.Device == "bind" { + fi, _, err := m.srcStat() + if err != nil { + // Error out if the source of a bind mount does not exist as we + // will be unable to bind anything to it. + return err + } + dstIsFile = !fi.IsDir() + } + + // In previous runc versions, we would tolerate nonsense paths with + // dangling symlinks as path components. pathrs-lite does not support + // this, so instead we have to emulate this behaviour by doing + // SecureJoin *purely to get a semi-reasonable path to use* and then we + // use pathrs-lite to operate on the path safely. + newUnsafePath, err := securejoin.SecureJoin(rootfs, unsafePath) + if err != nil { + return err + } + unsafePath = utils.StripRoot(rootfs, newUnsafePath) + + if dstIsFile { + dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644) + } else { + dstFile, err = pathrs.MkdirAllInRootOpen(rootfs, unsafePath, 0o755) + } + if err != nil { + return fmt.Errorf("make mountpoint %q: %w", m.Destination, err) + } + } + + if m.Device == "tmpfs" { + // If the original target exists, copy the mode for the tmpfs mount. + stat, err := dstFile.Stat() + if err != nil { + return fmt.Errorf("check tmpfs source mode: %w", err) + } + dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) + if m.Data != "" { + dt = dt + "," + m.Data + } + m.Data = dt + } + + dstFullPath, err := procfs.ProcSelfFdReadlink(dstFile) + if err != nil { + return fmt.Errorf("get mount destination real path: %w", err) + } + if !pathrs.IsLexicallyInRoot(rootfs, dstFullPath) { + return fmt.Errorf("mountpoint %q is outside of rootfs %q", dstFullPath, rootfs) + } + if relPath, err := filepath.Rel(rootfs, dstFullPath); err != nil { + return fmt.Errorf("get relative path of %q: %w", dstFullPath, err) + } else if relPath == "." { + return fmt.Errorf("mountpoint %q is on the top of rootfs %q", dstFullPath, rootfs) + } + // TODO: Make checkProcMount use dstFile directly to avoid the need to + // operate on paths here. + if err := checkProcMount(rootfs, dstFullPath, *m); err != nil { + return fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err) + } + // Update mountEntry. + m.dstFile = dstFile + return nil +} + +func mountToRootfs(c *mountConfig, m mountEntry) error { + rootfs := c.root + + // procfs and sysfs are special because we need to ensure they are actually + // mounted on a specific path in a container without any funny business. + switch m.Device { + case "proc", "sysfs": + // If the destination already exists and is not a directory, we bail + // out. This is to avoid mounting through a symlink or similar -- which + // has been a "fun" attack scenario in the past. + // TODO: This won't be necessary once we switch to libpathrs and we can + // stop all of these symlink-exchange attacks. + dest := filepath.Clean(m.Destination) + if !pathrs.IsLexicallyInRoot(rootfs, dest) { + // Do not use securejoin as it resolves symlinks. + dest = filepath.Join(rootfs, dest) + } + if err := checkProcMount(rootfs, dest, m); err != nil { + return err + } + if fi, err := os.Lstat(dest); err != nil { + if !os.IsNotExist(err) { + return err + } + } else if !fi.IsDir() { + return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) + } + dstFile, err := pathrs.MkdirAllInRootOpen(rootfs, dest, 0o755) + if err != nil { + return err + } + defer dstFile.Close() + // "proc" and "sys" mounts need special handling (without resolving the + // destination) to avoid attacks. + m.dstFile = dstFile + return m.mountPropagate(rootfs, "") + } + + mountLabel := c.label + if err := m.createOpenMountpoint(rootfs); err != nil { + return fmt.Errorf("create mountpoint for %s mount: %w", m.Destination, err) + } + defer func() { + if m.dstFile != nil { + _ = m.dstFile.Close() + m.dstFile = nil + } + }() + + switch m.Device { + case "mqueue": + if err := m.mountPropagate(rootfs, ""); err != nil { + return err + } + return utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return label.SetFileLabel(dstFd, mountLabel) + }) + case "tmpfs": + var err error + if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP { + err = doTmpfsCopyUp(m, mountLabel) + } else { + err = m.mountPropagate(rootfs, mountLabel) + } + return err + case "bind": + // open_tree()-related shenanigans are all handled in mountViaFds. + if err := m.mountPropagate(rootfs, mountLabel); err != nil { + return err + } + + // The initial MS_BIND won't change the mount options, we need to do a + // separate MS_BIND|MS_REMOUNT to apply the mount options. We skip + // doing this if the user has not specified any mount flags at all + // (including cleared flags) -- in which case we just keep the original + // mount flags. + // + // Note that the fact we check whether any clearing flags are set is in + // contrast to mount(8)'s current behaviour, but is what users probably + // expect. See . + if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 { + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT + // The runtime-spec says we SHOULD map to the relevant mount(8) + // behaviour. However, it's not clear whether we want the + // "mount --bind -o ..." or "mount --bind -o remount,..." + // behaviour here -- both of which are somewhat broken[1]. + // + // So, if the user has passed "remount" as a mount option, we + // implement the "mount --bind -o remount" behaviour, otherwise + // we implement the spiritual intent of the "mount --bind -o" + // behaviour, which should match what users expect. Maybe + // mount(8) will eventually implement this behaviour too.. + // + // [1]: https://github.com/util-linux/util-linux/issues/2433 + + // Initially, we emulate "mount --bind -o ..." where we set + // only the requested flags (clearing any existing flags). The + // only difference from mount(8) is that we do this + // unconditionally, regardless of whether any set-me mount + // options have been requested. + // + // TODO: We are not doing any special handling of the atime + // flags here, which means that the mount will inherit the old + // atime flags if the user didn't explicitly request a + // different set of flags. This also has the mount(8) bug where + // "nodiratime,norelatime" will result in a + // "nodiratime,relatime" mount. + mountErr := mountViaFds("", nil, m.Destination, dstFd, "", uintptr(flags), "") + if mountErr == nil { + return nil + } + + // If the mount failed, the mount may contain locked mount + // flags. In that case, we emulate "mount --bind -o + // remount,...", where we take the existing mount flags of the + // mount and apply the request flags (including clearing flags) + // on top. The main divergence we have from mount(8) here is + // that we handle atimes correctly to make sure we error out if + // we cannot fulfil the requested mount flags. + + st, err := m.srcStatfs() + if err != nil { + return err + } + srcFlags := statfsToMountFlags(*st) + + logrus.Debugf( + "working around failure to set vfs flags on bind-mount %s: srcFlags=%s flagsSet=%s flagsClr=%s: %v", + m.Destination, stringifyMountFlags(srcFlags), + stringifyMountFlags(m.Flags), stringifyMountFlags(m.ClearedFlags), mountErr) + + // If the user explicitly request one of the locked flags *not* + // be set, we need to return an error to avoid producing mounts + // that don't match the user's request. + if cannotClearFlags := srcFlags & m.ClearedFlags & mntLockFlags; cannotClearFlags != 0 { + return fmt.Errorf("cannot clear locked flags %s: %w", stringifyMountFlags(cannotClearFlags), mountErr) + } + + // If an MS_*ATIME flag was requested, it must match the + // existing one. This handles two separate kernel bugs, and + // matches the logic of can_change_locked_flags() but without + // these bugs: + // + // * (2.6.30+) Since commit 613cbe3d4870 ("Don't set relatime + // when noatime is specified"), MS_RELATIME is ignored when + // MS_NOATIME is set. This means that us inheriting MS_NOATIME + // from a mount while requesting MS_RELATIME would *silently* + // produce an MS_NOATIME mount. + // + // * (2.6.30+) Since its introduction in commit d0adde574b84 + // ("Add a strictatime mount option"), MS_STRICTATIME has + // caused any passed MS_RELATIME and MS_NOATIME flags to be + // ignored which results in us *silently* producing + // MS_STRICTATIME mounts even if the user requested MS_RELATIME + // or MS_NOATIME. + if m.Flags&mntAtimeFlags != 0 && m.Flags&mntAtimeFlags != srcFlags&mntAtimeFlags { + return fmt.Errorf("cannot change locked atime flags %s: %w", stringifyMountFlags(srcFlags&mntAtimeFlags), mountErr) + } + + // Retry the mount with the existing lockable mount flags + // applied. + flags |= srcFlags & mntLockFlags + mountErr = mountViaFds("", nil, m.Destination, dstFd, "", uintptr(flags), "") + if mountErr != nil { + mountErr = fmt.Errorf("remount with locked flags %s re-applied: %w", stringifyMountFlags(srcFlags&mntLockFlags), mountErr) + } + return mountErr + }); err != nil { + return fmt.Errorf("failed to set user-requested vfs flags on bind-mount: %w", err) + } + } + + if m.Relabel != "" { + if err := label.Validate(m.Relabel); err != nil { + return err + } + shared := label.IsShared(m.Relabel) + if err := label.Relabel(m.Source, mountLabel, shared); err != nil { + return err + } + } + return setRecAttr(m) + case "cgroup": + if cgroups.IsCgroup2UnifiedMode() { + return mountCgroupV2(m, c) + } + return mountCgroupV1(m, c) + default: + return m.mountPropagate(rootfs, mountLabel) + } +} + +func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { + mounts, err := cgroups.GetCgroupMounts(false) + if err != nil { + return nil, err + } + + // We don't need to use /proc/thread-self here because runc always runs + // with every thread in the same cgroup. This lets us avoid having to do + // runtime.LockOSThread. + cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") + if err != nil { + return nil, err + } + + var binds []*configs.Mount + + for _, mm := range mounts { + dir, err := mm.GetOwnCgroup(cgroupPaths) + if err != nil { + return nil, err + } + relDir, err := filepath.Rel(mm.Root, dir) + if err != nil { + return nil, err + } + binds = append(binds, &configs.Mount{ + Device: "bind", + Source: filepath.Join(mm.Mountpoint, relDir), + Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)), + Flags: unix.MS_BIND | unix.MS_REC | m.Flags, + PropagationFlags: m.PropagationFlags, + }) + } + + return binds, nil +} + +// Taken from . If a file is on a filesystem of type +// PROC_SUPER_MAGIC, we're guaranteed that only the root of the superblock will +// have this inode number. +const procRootIno = 1 + +// checkProcMount checks to ensure that the mount destination is not over the top of /proc. +// dest is required to be an abs path and have any symlinks resolved before calling this function. +// +// If m is nil, don't stat the filesystem. This is used for restore of a checkpoint. +func checkProcMount(rootfs, dest string, m mountEntry) error { + const procPath = "/proc" + path, err := filepath.Rel(filepath.Join(rootfs, procPath), dest) + if err != nil { + return err + } + // pass if the mount path is located outside of /proc + if strings.HasPrefix(path, "..") { + return nil + } + if path == "." { + // Only allow bind-mounts on top of /proc, and only if the source is a + // procfs mount. + if m.IsBind() { + fsSt, err := m.srcStatfs() + if err != nil { + return err + } + if fsSt.Type == unix.PROC_SUPER_MAGIC { + if _, uSt, err := m.srcStat(); err != nil { + return err + } else if uSt.Ino != procRootIno { + // We cannot error out in this case, because we've + // supported these kinds of mounts for a long time. + // However, we would expect users to bind-mount the root of + // a real procfs on top of /proc in the container. We might + // want to block this in the future. + logrus.Warnf("bind-mount %v (source %v) is of type procfs but is not the root of a procfs (inode %d). Future versions of runc might block this configuration -- please report an issue to if you see this warning.", dest, m.srcName(), uSt.Ino) + } + return nil + } + } else if m.Device == "proc" { + // Fresh procfs-type mounts are always safe to mount on top of /proc. + return nil + } + return fmt.Errorf("%q cannot be mounted because it is not of type proc", dest) + } + + // Here dest is definitely under /proc. Do not allow those, + // except for a few specific entries emulated by lxcfs. + validProcMounts := []string{ + "/proc/cpuinfo", + "/proc/diskstats", + "/proc/meminfo", + "/proc/stat", + "/proc/swaps", + "/proc/uptime", + "/proc/loadavg", + "/proc/slabinfo", + "/proc/sys/kernel/ns_last_pid", + "/proc/sys/crypto/fips_enabled", + } + for _, valid := range validProcMounts { + path, err := filepath.Rel(filepath.Join(rootfs, valid), dest) + if err != nil { + return err + } + if path == "." { + return nil + } + } + + return fmt.Errorf("%q cannot be mounted because it is inside /proc", dest) +} + +func setupDevSymlinks(rootfs string) error { + // In theory, these should be links to /proc/thread-self, but systems + // expect these to be /proc/self and this matches how most distributions + // work. + links := [][2]string{ + {"/proc/self/fd", "/dev/fd"}, + {"/proc/self/fd/0", "/dev/stdin"}, + {"/proc/self/fd/1", "/dev/stdout"}, + {"/proc/self/fd/2", "/dev/stderr"}, + } + // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink + // in /dev if it exists in /proc. + if _, err := os.Stat("/proc/kcore"); err == nil { + links = append(links, [2]string{"/proc/kcore", "/dev/core"}) + } + for _, link := range links { + var ( + src = link[0] + dst = filepath.Join(rootfs, link[1]) + ) + if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { + return err + } + } + return nil +} + +// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs +// this method will make them point to `/dev/null` in this container's rootfs. This +// needs to be called after we chroot/pivot into the container's rootfs so that any +// symlinks are resolved locally. +func reOpenDevNull() error { + file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + return err + } + defer file.Close() + if err := verifyDevNull(file); err != nil { + return fmt.Errorf("can't reopen /dev/null: %w", err) + } + for fd := 0; fd < 3; fd++ { + var stat unix.Stat_t + if err := unix.Fstat(fd, &stat); err != nil { + return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err} + } + if isDevNull(&stat) { + // Close and re-open the fd. + if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { + return &os.PathError{ + Op: "dup3", + Path: "fd " + strconv.Itoa(int(file.Fd())), + Err: err, + } + } + } + } + return nil +} + +// Create the device nodes in the container. +func createDevices(config *configs.Config) error { + useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER) + for _, node := range config.Devices { + + // The /dev/ptmx device is setup by setupPtmx() + if utils.CleanPath(node.Path) == "/dev/ptmx" { + continue + } + + // containers running in a user namespace are not allowed to mknod + // devices so we can just bind mount it from the host. + if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil { + return err + } + } + return nil +} + +func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device) error { + dstFile, err := utils.Openat(destDir, destName, unix.O_CREAT|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0o000) + if err != nil { + return fmt.Errorf("create device inode %s: %w", node.Path, err) + } + defer dstFile.Close() + + return utils.WithProcfdFile(dstFile, func(dstFd string) error { + return mountViaFds(node.Path, nil, dstFile.Name(), dstFd, "bind", unix.MS_BIND, "") + }) +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { + if node.Path == "" { + // The node only exists for cgroup reasons, ignore it here. + return nil + } + destPath, err := securejoin.SecureJoin(rootfs, node.Path) + if err != nil { + return err + } + if destPath == rootfs { + return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile) + } + destDirPath, destName := filepath.Split(destPath) + destDir, err := pathrs.MkdirAllInRootOpen(rootfs, destDirPath, 0o755) + if err != nil { + return fmt.Errorf("mkdir parent of device inode %q: %w", node.Path, err) + } + if bind { + return bindMountDeviceNode(destDir, destName, node) + } + if err := mknodDevice(destDir, destName, node); err != nil { + if errors.Is(err, os.ErrExist) { + return nil + } else if errors.Is(err, os.ErrPermission) { + return bindMountDeviceNode(destDir, destName, node) + } + return err + } + return nil +} + +func mknodDevice(destDir *os.File, destName string, node *devices.Device) error { + fileMode := node.FileMode + switch node.Type { + case devices.BlockDevice: + fileMode |= unix.S_IFBLK + case devices.CharDevice: + fileMode |= unix.S_IFCHR + case devices.FifoDevice: + fileMode |= unix.S_IFIFO + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + dev, err := node.Mkdev() + if err != nil { + return err + } + if err := unix.Mknodat(int(destDir.Fd()), destName, uint32(fileMode), int(dev)); err != nil { + return &os.PathError{Op: "mknodat", Path: filepath.Join(destDir.Name(), destName), Err: err} + } + + // Get a handle and verify that it matches the expected inode type and + // major:minor before we operate on it. + devFile, err := utils.Openat(destDir, destName, unix.O_NOFOLLOW|unix.O_PATH, 0) + if err != nil { + return fmt.Errorf("open new %c device inode %s: %w", node.Type, node.Path, err) + } + defer devFile.Close() + + if err := sys.VerifyInode(devFile, func(stat *unix.Stat_t, _ *unix.Statfs_t) error { + if stat.Mode&unix.S_IFMT != uint32(fileMode)&unix.S_IFMT { + return fmt.Errorf("new %c device inode %s has incorrect ftype: %#x doesn't match expected %#v", + node.Type, node.Path, + stat.Mode&unix.S_IFMT, fileMode&unix.S_IFMT) + } + if stat.Rdev != dev { + return fmt.Errorf("new %c device inode %s has incorrect major:minor: %d:%d doesn't match expected %d:%d", + node.Type, node.Path, + unix.Major(stat.Rdev), unix.Minor(stat.Rdev), + unix.Major(dev), unix.Minor(dev)) + } + return nil + }); err != nil { + return err + } + + // Ensure permission bits (can be different because of umask). + if err := sys.FchmodFile(devFile, uint32(fileMode)); err != nil { + return fmt.Errorf("update new %c device inode %s file mode: %w", node.Type, node.Path, err) + } + if err := sys.FchownFile(devFile, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("update new %c device inode %s owner: %w", node.Type, node.Path, err) + } + runtime.KeepAlive(devFile) + return nil +} + +// rootfsParentMountPrivate ensures rootfs parent mount is private. +// This is needed for two reasons: +// - pivot_root() will fail if parent mount is shared; +// - when we bind mount rootfs, if its parent is not private, the new mount +// will propagate (leak!) to parent namespace and we don't want that. +func rootfsParentMountPrivate(path string) error { + var err error + // Assuming path is absolute and clean (this is checked in + // libcontainer/validate). Any error other than EINVAL means we failed, + // and EINVAL means this is not a mount point, so traverse up until we + // find one. + for { + err = unix.Mount("", path, "", unix.MS_PRIVATE, "") + if err == nil { + return nil + } + if err != unix.EINVAL || path == "/" { //nolint:errorlint // unix errors are bare + break + } + path = filepath.Dir(path) + } + return &mountError{ + op: "remount-private", + target: path, + flags: unix.MS_PRIVATE, + err: err, + } +} + +func prepareRoot(config *configs.Config) error { + flag := unix.MS_SLAVE | unix.MS_REC + if config.RootPropagation != 0 { + flag = config.RootPropagation + } + if err := mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + + if err := rootfsParentMountPrivate(config.Rootfs); err != nil { + return err + } + + return mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "") +} + +func setReadonly() error { + flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY) + + err := mount("", "/", "", flags, "") + if err == nil { + return nil + } + var s unix.Statfs_t + if err := unix.Statfs("/", &s); err != nil { + return &os.PathError{Op: "statfs", Path: "/", Err: err} + } + flags |= uintptr(s.Flags) + return mount("", "/", "", flags, "") +} + +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.Rootfs, "dev/ptmx") + if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { + return err + } + if err := os.Symlink("pts/ptmx", ptmx); err != nil { + return err + } + return nil +} + +// pivotRoot will call pivot_root such that rootfs becomes the new root +// filesystem, and everything else is cleaned up. +func pivotRoot(rootfs string) error { + // While the documentation may claim otherwise, pivot_root(".", ".") is + // actually valid. What this results in is / being the new root but + // /proc/self/cwd being the old root. Since we can play around with the cwd + // with pivot_root this allows us to pivot without creating directories in + // the rootfs. Shout-outs to the LXC developers for giving us this idea. + + oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return &os.PathError{Op: "open", Path: "/", Err: err} + } + defer unix.Close(oldroot) //nolint: errcheck + + newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return &os.PathError{Op: "open", Path: rootfs, Err: err} + } + defer unix.Close(newroot) //nolint: errcheck + + // Change to the new root so that the pivot_root actually acts on it. + if err := unix.Fchdir(newroot); err != nil { + return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(newroot), Err: err} + } + + if err := unix.PivotRoot(".", "."); err != nil { + return &os.PathError{Op: "pivot_root", Path: ".", Err: err} + } + + // Currently our "." is oldroot (according to the current kernel code). + // However, purely for safety, we will fchdir(oldroot) since there isn't + // really any guarantee from the kernel what /proc/self/cwd will be after a + // pivot_root(2). + + if err := unix.Fchdir(oldroot); err != nil { + return &os.PathError{Op: "fchdir", Path: "fd " + strconv.Itoa(oldroot), Err: err} + } + + // Make oldroot rslave to make sure our unmounts don't propagate to the + // host (and thus bork the machine). We don't use rprivate because this is + // known to cause issues due to races where we still have a reference to a + // mount while a process in the host namespace are trying to operate on + // something they think has no mounts (devicemapper in particular). + if err := mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + return err + } + // Perform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. + if err := unmount(".", unix.MNT_DETACH); err != nil { + return err + } + + // Switch back to our shiny new root. + if err := unix.Chdir("/"); err != nil { + return &os.PathError{Op: "chdir", Path: "/", Err: err} + } + return nil +} + +func msMoveRoot(rootfs string) error { + // Before we move the root and chroot we have to mask all "full" sysfs and + // procfs mounts which exist on the host. This is because while the kernel + // has protections against mounting procfs if it has masks, when using + // chroot(2) the *host* procfs mount is still reachable in the mount + // namespace and the kernel permits procfs mounts inside --no-pivot + // containers. + // + // Users shouldn't be using --no-pivot except in exceptional circumstances, + // but to avoid such a trivial security flaw we apply a best-effort + // protection here. The kernel only allows a mount of a pseudo-filesystem + // like procfs or sysfs if there is a *full* mount (the root of the + // filesystem is mounted) without any other locked mount points covering a + // subtree of the mount. + // + // So we try to unmount (or mount tmpfs on top of) any mountpoint which is + // a full mount of either sysfs or procfs (since those are the most + // concerning filesystems to us). + mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) { + // Collect every sysfs and procfs filesystem, except for those which + // are non-full mounts or are inside the rootfs of the container. + if info.Root != "/" || + (info.FSType != "proc" && info.FSType != "sysfs") || + strings.HasPrefix(info.Mountpoint, rootfs) { + skip = true + } + return skip, stop + }) + if err != nil { + return err + } + for _, info := range mountinfos { + p := info.Mountpoint + // Be sure umount events are not propagated to the host. + if err := mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + if errors.Is(err, unix.ENOENT) { + // If the mountpoint doesn't exist that means that we've + // already blasted away some parent directory of the mountpoint + // and so we don't care about this error. + continue + } + return err + } + if err := unmount(p, unix.MNT_DETACH); err != nil { + if !errors.Is(err, unix.EINVAL) && !errors.Is(err, unix.EPERM) { + return err + } else { + // If we have not privileges for umounting (e.g. rootless), then + // cover the path. + if err := mount("tmpfs", p, "tmpfs", 0, ""); err != nil { + return err + } + } + } + } + + // Move the rootfs on top of "/" in our mount namespace. + if err := mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil { + return err + } + return chroot() +} + +func chroot() error { + if err := unix.Chroot("."); err != nil { + return &os.PathError{Op: "chroot", Path: ".", Err: err} + } + if err := unix.Chdir("/"); err != nil { + return &os.PathError{Op: "chdir", Path: "/", Err: err} + } + return nil +} + +// readonlyPath will make a path read only. +func readonlyPath(path string) error { + if err := mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } + + var s unix.Statfs_t + if err := unix.Statfs(path, &s); err != nil { + return &os.PathError{Op: "statfs", Path: path, Err: err} + } + flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) + + if err := mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil { + return err + } + + return nil +} + +// remountReadonly will remount an existing mount point and ensure that it is read-only. +func remountReadonly(m *configs.Mount) error { + var ( + dest = m.Destination + flags = m.Flags + ) + for i := 0; i < 5; i++ { + // There is a special case in the kernel for + // MS_REMOUNT | MS_BIND, which allows us to change only the + // flags even as an unprivileged user (i.e. user namespace) + // assuming we don't drop any security related flags (nodev, + // nosuid, etc.). So, let's use that case so that we can do + // this re-mount without failing in a userns. + flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY + if err := mount("", dest, "", uintptr(flags), ""); err != nil { + if errors.Is(err, unix.EBUSY) { + time.Sleep(100 * time.Millisecond) + continue + } + return err + } + return nil + } + return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) +} + +func isDevNull(st *unix.Stat_t) bool { + return st.Mode&unix.S_IFMT == unix.S_IFCHR && st.Rdev == unix.Mkdev(1, 3) +} + +func verifyDevNull(f *os.File) error { + return sys.VerifyInode(f, func(st *unix.Stat_t, _ *unix.Statfs_t) error { + if !isDevNull(st) { + return errors.New("container's /dev/null is invalid") + } + return nil + }) +} + +// maskPaths masks the top of the specified paths inside a container to avoid +// security issues from processes reading information from non-namespace aware +// mounts ( proc/kcore ). +// For files, maskPath bind mounts /dev/null over the top of the specified path. +// For directories, maskPath mounts read-only tmpfs over the top of the specified path. +func maskPaths(paths []string, mountLabel string) error { + devNull, err := os.OpenFile("/dev/null", unix.O_PATH, 0) + if err != nil { + return fmt.Errorf("can't mask paths: %w", err) + } + defer devNull.Close() + if err := verifyDevNull(devNull); err != nil { + return fmt.Errorf("can't mask paths: %w", err) + } + devNullSrc := &mountSource{Type: mountSourcePlain, file: devNull} + procSelfFd, closer := utils.ProcThreadSelf("fd/") + defer closer() + + for _, path := range paths { + // Open the target path; skip if it doesn't exist. + dstFh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + return fmt.Errorf("can't mask path %q: %w", path, err) + } + st, err := dstFh.Stat() + if err != nil { + dstFh.Close() + return fmt.Errorf("can't mask path %q: %w", path, err) + } + var dstType string + if st.IsDir() { + // Destination is a directory: bind mount a ro tmpfs over it. + dstType = "dir" + err = mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) + } else { + // Destination is a file: mount it to /dev/null. + dstType = "path" + dstFd := filepath.Join(procSelfFd, strconv.Itoa(int(dstFh.Fd()))) + err = mountViaFds("", devNullSrc, path, dstFd, "", unix.MS_BIND, "") + } + dstFh.Close() + if err != nil { + return fmt.Errorf("can't mask %s %q: %w", dstType, path, err) + } + } + + return nil +} + +func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err error) { + fullPath, err := procfs.ProcSelfFdReadlink(f) + if err != nil { + return nil, fmt.Errorf("get full path: %w", err) + } + if !pathrs.IsLexicallyInRoot(rootfs, fullPath) { + return nil, fmt.Errorf("mountpoint %q is outside of rootfs %q", fullPath, rootfs) + } + unsafePath := utils.StripRoot(rootfs, fullPath) + reopened, err := pathrs.OpenInRoot(rootfs, unsafePath, flags) + if err != nil { + return nil, fmt.Errorf("re-open mountpoint %q: %w", unsafePath, err) + } + defer func() { + if Err != nil { + _ = reopened.Close() + } + }() + + // NOTE: The best we can do here is confirm that the new mountpoint handle + // matches the original target handle, but an attacker could've swapped a + // different path to replace it. In the worst case this could result in us + // applying later vfsmount flags onto the wrong mount. + // + // This is far from ideal, but the only way of doing this in a race-free + // way is to switch the new mount API (move_mount(2) does not require this + // re-opening step, and thus no such races are possible). + reopenedFullPath, err := procfs.ProcSelfFdReadlink(reopened) + if err != nil { + return nil, fmt.Errorf("check full path of re-opened mountpoint: %w", err) + } + if reopenedFullPath != fullPath { + return nil, fmt.Errorf("mountpoint %q was moved while re-opening", unsafePath) + } + return reopened, nil +} + +// Do the mount operation followed by additional mounts required to take care +// of propagation flags. This will always be scoped inside the container rootfs. +func (m *mountEntry) mountPropagate(rootfs string, mountLabel string) error { + var ( + data = label.FormatMountLabel(m.Data, mountLabel) + flags = m.Flags + ) + // Delay mounting the filesystem read-only if we need to do further + // operations on it. We need to set up files in "/dev", and other tmpfs + // mounts may need to be chmod-ed after mounting. These mounts will be + // remounted ro later in finalizeRootfs(), if necessary. + if m.Device == "tmpfs" || utils.CleanPath(m.Destination) == "/dev" { + flags &= ^unix.MS_RDONLY + } + + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, m.srcFile, m.Destination, dstFd, m.Device, uintptr(flags), data) + }); err != nil { + return err + } + + // We need to re-open the mountpoint after doing the mount, in order for us + // to operate on the new mount we just created. However, we cannot use + // pathrs.Reopen because we need to re-resolve from the parent directory to + // get a new handle to the top mount. + // + // TODO: Use move_mount(2) on newer kernels so that this is no longer + // necessary on modern systems. + newDstFile, err := reopenAfterMount(rootfs, m.dstFile, unix.O_PATH) + if err != nil { + return fmt.Errorf("reopen mountpoint after mount: %w", err) + } + _ = m.dstFile.Close() + m.dstFile = newDstFile + + // We have to apply mount propagation flags in a separate WithProcfd() call + // because the previous call invalidates the passed procfd -- the mount + // target needs to be re-opened. + if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + for _, pflag := range m.PropagationFlags { + if err := mountViaFds("", nil, m.Destination, dstFd, "", uintptr(pflag), ""); err != nil { + return err + } + } + return nil + }); err != nil { + return fmt.Errorf("change mount propagation through procfd: %w", err) + } + return nil +} + +func setRecAttr(m mountEntry) error { + if m.RecAttr == nil { + return nil + } + return utils.WithProcfdFile(m.dstFile, func(procfd string) error { + return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr) + }) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go new file mode 100644 index 0000000000..3ca03ed8a3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go @@ -0,0 +1,150 @@ +package seccomp + +import ( + "fmt" + "sort" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" +) + +// flagTsync is recognized but ignored by runc, and it is not defined +// in the runtime-spec. +const flagTsync = "SECCOMP_FILTER_FLAG_TSYNC" + +var operators = map[string]configs.Operator{ + "SCMP_CMP_NE": configs.NotEqualTo, + "SCMP_CMP_LT": configs.LessThan, + "SCMP_CMP_LE": configs.LessThanOrEqualTo, + "SCMP_CMP_EQ": configs.EqualTo, + "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, + "SCMP_CMP_GT": configs.GreaterThan, + "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, +} + +// KnownOperators returns the list of the known operations. +// Used by `runc features`. +func KnownOperators() []string { + var res []string + for k := range operators { + res = append(res, k) + } + sort.Strings(res) + return res +} + +var actions = map[string]configs.Action{ + "SCMP_ACT_KILL": configs.Kill, + "SCMP_ACT_ERRNO": configs.Errno, + "SCMP_ACT_TRAP": configs.Trap, + "SCMP_ACT_ALLOW": configs.Allow, + "SCMP_ACT_TRACE": configs.Trace, + "SCMP_ACT_LOG": configs.Log, + "SCMP_ACT_NOTIFY": configs.Notify, + "SCMP_ACT_KILL_THREAD": configs.KillThread, + "SCMP_ACT_KILL_PROCESS": configs.KillProcess, +} + +// KnownActions returns the list of the known actions. +// Used by `runc features`. +func KnownActions() []string { + var res []string + for k := range actions { + res = append(res, k) + } + sort.Strings(res) + return res +} + +var archs = map[string]string{ + "SCMP_ARCH_X86": "x86", + "SCMP_ARCH_X86_64": "amd64", + "SCMP_ARCH_X32": "x32", + "SCMP_ARCH_ARM": "arm", + "SCMP_ARCH_AARCH64": "arm64", + "SCMP_ARCH_MIPS": "mips", + "SCMP_ARCH_MIPS64": "mips64", + "SCMP_ARCH_MIPS64N32": "mips64n32", + "SCMP_ARCH_MIPSEL": "mipsel", + "SCMP_ARCH_MIPSEL64": "mipsel64", + "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", + "SCMP_ARCH_PPC": "ppc", + "SCMP_ARCH_PPC64": "ppc64", + "SCMP_ARCH_PPC64LE": "ppc64le", + "SCMP_ARCH_RISCV64": "riscv64", + "SCMP_ARCH_S390": "s390", + "SCMP_ARCH_S390X": "s390x", +} + +// KnownArchs returns the list of the known archs. +// Used by `runc features`. +func KnownArchs() []string { + var res []string + for k := range archs { + res = append(res, k) + } + sort.Strings(res) + return res +} + +// ConvertStringToOperator converts a string into a Seccomp comparison operator. +// Comparison operators use the names they are assigned by Libseccomp's header. +// Attempting to convert a string that is not a valid operator results in an +// error. +func ConvertStringToOperator(in string) (configs.Operator, error) { + if op, ok := operators[in]; ok { + return op, nil + } + return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) +} + +// ConvertStringToAction converts a string into a Seccomp rule match action. +// Actions use the names they are assigned in Libseccomp's header. +// Attempting to convert a string that is not a valid action results in an +// error. +func ConvertStringToAction(in string) (configs.Action, error) { + if act, ok := actions[in]; ok { + return act, nil + } + return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) +} + +// ConvertStringToArch converts a string into a Seccomp comparison arch. +func ConvertStringToArch(in string) (string, error) { + if arch, ok := archs[in]; ok { + return arch, nil + } + return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) +} + +// List of flags known to this version of runc. +var flags = []string{ + flagTsync, + string(specs.LinuxSeccompFlagSpecAllow), + string(specs.LinuxSeccompFlagLog), +} + +// KnownFlags returns the list of the known filter flags. +// Used by `runc features`. +func KnownFlags() []string { + return flags +} + +// SupportedFlags returns the list of the supported filter flags. +// This list may be a subset of one returned by KnownFlags due to +// some flags not supported by the current kernel and/or libseccomp. +// Used by `runc features`. +func SupportedFlags() []string { + if !Enabled { + return nil + } + + var res []string + for _, flag := range flags { + if FlagSupported(specs.LinuxSeccompFlag(flag)) == nil { + res = append(res, flag) + } + } + + return res +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go new file mode 100644 index 0000000000..14c03f2779 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go @@ -0,0 +1,735 @@ +//go:build cgo && seccomp + +package patchbpf + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "os" + "runtime" + "unsafe" + + libseccomp "github.com/seccomp/libseccomp-golang" + "github.com/sirupsen/logrus" + "golang.org/x/net/bpf" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +// #cgo pkg-config: libseccomp +/* +#include +#include +#include +#include + +const uint32_t C_ACT_ERRNO_ENOSYS = SCMP_ACT_ERRNO(ENOSYS); + +// Copied from . + +#ifndef SECCOMP_SET_MODE_FILTER +# define SECCOMP_SET_MODE_FILTER 1 +#endif +const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER; + +#ifndef SECCOMP_FILTER_FLAG_LOG +# define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif +const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG; + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +# define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) +#endif +const uintptr_t C_FILTER_FLAG_SPEC_ALLOW = SECCOMP_FILTER_FLAG_SPEC_ALLOW; + +#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER +# define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#endif +const uintptr_t C_FILTER_FLAG_NEW_LISTENER = SECCOMP_FILTER_FLAG_NEW_LISTENER; + +#ifndef AUDIT_ARCH_RISCV64 +#ifndef EM_RISCV +#define EM_RISCV 243 +#endif +#define AUDIT_ARCH_RISCV64 (EM_RISCV|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#endif + +// We use the AUDIT_ARCH_* values because those are the ones used by the kernel +// and SCMP_ARCH_* sometimes has fake values (such as SCMP_ARCH_X32). But we +// use so we get libseccomp's fallback definitions of AUDIT_ARCH_*. + +const uint32_t C_AUDIT_ARCH_I386 = AUDIT_ARCH_I386; +const uint32_t C_AUDIT_ARCH_X86_64 = AUDIT_ARCH_X86_64; +const uint32_t C_AUDIT_ARCH_ARM = AUDIT_ARCH_ARM; +const uint32_t C_AUDIT_ARCH_AARCH64 = AUDIT_ARCH_AARCH64; +const uint32_t C_AUDIT_ARCH_MIPS = AUDIT_ARCH_MIPS; +const uint32_t C_AUDIT_ARCH_MIPS64 = AUDIT_ARCH_MIPS64; +const uint32_t C_AUDIT_ARCH_MIPS64N32 = AUDIT_ARCH_MIPS64N32; +const uint32_t C_AUDIT_ARCH_MIPSEL = AUDIT_ARCH_MIPSEL; +const uint32_t C_AUDIT_ARCH_MIPSEL64 = AUDIT_ARCH_MIPSEL64; +const uint32_t C_AUDIT_ARCH_MIPSEL64N32 = AUDIT_ARCH_MIPSEL64N32; +const uint32_t C_AUDIT_ARCH_PPC = AUDIT_ARCH_PPC; +const uint32_t C_AUDIT_ARCH_PPC64 = AUDIT_ARCH_PPC64; +const uint32_t C_AUDIT_ARCH_PPC64LE = AUDIT_ARCH_PPC64LE; +const uint32_t C_AUDIT_ARCH_S390 = AUDIT_ARCH_S390; +const uint32_t C_AUDIT_ARCH_S390X = AUDIT_ARCH_S390X; +const uint32_t C_AUDIT_ARCH_RISCV64 = AUDIT_ARCH_RISCV64; +*/ +import "C" + +var retErrnoEnosys = uint32(C.C_ACT_ERRNO_ENOSYS) + +// Assume sizeof(int) == 4 in the BPF program. +const bpfSizeofInt = 4 + +// This syscall is used for multiplexing "large" syscalls on s390(x). Unknown +// syscalls will end up with this syscall number, so we need to explicitly +// return -ENOSYS for this syscall on those architectures. +const s390xMultiplexSyscall libseccomp.ScmpSyscall = 0 + +func isAllowAction(action configs.Action) bool { + switch action { + // Trace is considered an "allow" action because a good tracer should + // support future syscalls (by handling -ENOSYS on its own), and giving + // -ENOSYS will be disruptive for emulation. + case configs.Allow, configs.Log, configs.Trace: + return true + default: + return false + } +} + +func parseProgram(rdr io.Reader) ([]bpf.RawInstruction, error) { + var program []bpf.RawInstruction + for { + // Read the next instruction. We have to use NativeEndian because + // seccomp_export_bpf outputs the program in *host* endian-ness. + var insn unix.SockFilter + if err := binary.Read(rdr, binary.NativeEndian, &insn); err != nil { + if errors.Is(err, io.EOF) { + // Parsing complete. + break + } + if errors.Is(err, io.ErrUnexpectedEOF) { + // Parsing stopped mid-instruction. + return nil, fmt.Errorf("program parsing halted mid-instruction: %w", err) + } + // All other errors. + return nil, fmt.Errorf("error parsing instructions: %w", err) + } + program = append(program, bpf.RawInstruction{ + Op: insn.Code, + Jt: insn.Jt, + Jf: insn.Jf, + K: insn.K, + }) + } + return program, nil +} + +func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error) { + rdr, wtr, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("error creating scratch pipe: %w", err) + } + defer wtr.Close() + defer rdr.Close() + + readerBuffer := new(bytes.Buffer) + errChan := make(chan error, 1) + go func() { + _, err := io.Copy(readerBuffer, rdr) + errChan <- err + close(errChan) + }() + + if err := filter.ExportBPF(wtr); err != nil { + return nil, fmt.Errorf("error exporting BPF: %w", err) + } + // Close so that the reader actually gets EOF. + _ = wtr.Close() + + if copyErr := <-errChan; copyErr != nil { + return nil, fmt.Errorf("error reading from ExportBPF pipe: %w", copyErr) + } + + // Parse the instructions. + rawProgram, err := parseProgram(readerBuffer) + if err != nil { + return nil, fmt.Errorf("parsing generated BPF filter: %w", err) + } + program, ok := bpf.Disassemble(rawProgram) + if !ok { + return nil, errors.New("could not disassemble entire BPF filter") + } + return program, nil +} + +type linuxAuditArch uint32 + +const invalidArch linuxAuditArch = 0 + +func scmpArchToAuditArch(arch libseccomp.ScmpArch) (linuxAuditArch, error) { + switch arch { + case libseccomp.ArchNative: + // Convert to actual native architecture. + arch, err := libseccomp.GetNativeArch() + if err != nil { + return invalidArch, fmt.Errorf("unable to get native arch: %w", err) + } + return scmpArchToAuditArch(arch) + case libseccomp.ArchX86: + return linuxAuditArch(C.C_AUDIT_ARCH_I386), nil + case libseccomp.ArchAMD64, libseccomp.ArchX32: + // NOTE: x32 is treated like x86_64 except all x32 syscalls have the + // 30th bit of the syscall number set to indicate that it's not a + // normal x86_64 syscall. + return linuxAuditArch(C.C_AUDIT_ARCH_X86_64), nil + case libseccomp.ArchARM: + return linuxAuditArch(C.C_AUDIT_ARCH_ARM), nil + case libseccomp.ArchARM64: + return linuxAuditArch(C.C_AUDIT_ARCH_AARCH64), nil + case libseccomp.ArchMIPS: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPS), nil + case libseccomp.ArchMIPS64: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64), nil + case libseccomp.ArchMIPS64N32: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPS64N32), nil + case libseccomp.ArchMIPSEL: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL), nil + case libseccomp.ArchMIPSEL64: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64), nil + case libseccomp.ArchMIPSEL64N32: + return linuxAuditArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil + case libseccomp.ArchPPC: + return linuxAuditArch(C.C_AUDIT_ARCH_PPC), nil + case libseccomp.ArchPPC64: + return linuxAuditArch(C.C_AUDIT_ARCH_PPC64), nil + case libseccomp.ArchPPC64LE: + return linuxAuditArch(C.C_AUDIT_ARCH_PPC64LE), nil + case libseccomp.ArchS390: + return linuxAuditArch(C.C_AUDIT_ARCH_S390), nil + case libseccomp.ArchS390X: + return linuxAuditArch(C.C_AUDIT_ARCH_S390X), nil + case libseccomp.ArchRISCV64: + return linuxAuditArch(C.C_AUDIT_ARCH_RISCV64), nil + default: + return invalidArch, fmt.Errorf("unknown architecture: %v", arch) + } +} + +type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall + +// Figure out largest syscall number referenced in the filter for each +// architecture. We will be generating code based on the native architecture +// representation, but SCMP_ARCH_X32 means we have to track cases where the +// same architecture has different largest syscalls based on the mode. +func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) { + scmpArchs := make(map[libseccomp.ScmpArch]struct{}) + for _, ociArch := range config.Architectures { + arch, err := libseccomp.GetArchFromString(ociArch) + if err != nil { + return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err) + } + scmpArchs[arch] = struct{}{} + } + // On architectures like ppc64le, Docker inexplicably doesn't include the + // native architecture in the architecture list which results in no + // architectures being present in the list at all (rendering the ENOSYS + // stub a no-op). So, always include the native architecture. + if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil { + return nil, fmt.Errorf("unable to get native arch: %w", err) + } else if _, ok := scmpArchs[nativeScmpArch]; !ok { + logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch) + scmpArchs[nativeScmpArch] = struct{}{} + } + logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs) + + // Only loop over architectures which are present in the filter. Any other + // architectures will get the libseccomp bad architecture action anyway. + lastSyscalls := make(lastSyscallMap) + for arch := range scmpArchs { + auditArch, err := scmpArchToAuditArch(arch) + if err != nil { + return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err) + } + + if _, ok := lastSyscalls[auditArch]; !ok { + lastSyscalls[auditArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{} + } + if _, ok := lastSyscalls[auditArch][arch]; ok { + // Because of ArchNative we may hit the same entry multiple times. + // Just skip it if we've seen this (linuxAuditArch, ScmpArch) + // combination before. + continue + } + + // Find the largest syscall in the filter for this architecture. + var largestSyscall libseccomp.ScmpSyscall + for _, rule := range config.Syscalls { + sysno, err := libseccomp.GetSyscallFromNameByArch(rule.Name, arch) + if err != nil { + // Ignore unknown syscalls. + continue + } + if sysno > largestSyscall { + largestSyscall = sysno + } + } + if largestSyscall != 0 { + logrus.Debugf("seccomp: largest syscall number for arch %v is %v", arch, largestSyscall) + lastSyscalls[auditArch][arch] = largestSyscall + } else { + logrus.Warnf("could not find any syscalls for arch %v", arch) + delete(lastSyscalls[auditArch], arch) + } + } + return lastSyscalls, nil +} + +// FIXME FIXME FIXME +// +// This solution is less than ideal. In the future it would be great to have +// per-arch information about which syscalls were added in which kernel +// versions so we can create far more accurate filter rules (handling holes in +// the syscall table and determining -ENOSYS requirements based on kernel +// minimum version alone. +// +// This implementation can in principle cause issues with syscalls like +// close_range(2) which were added out-of-order in the syscall table between +// kernel releases. +func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error) { + // A jump-table for each linuxAuditArch used to generate the initial + // conditional jumps -- measured from the *END* of the program so they + // remain valid after prepending to the tail. + archJumpTable := map[linuxAuditArch]uint32{} + + // Generate our own -ENOSYS rules for each architecture. They have to be + // generated in reverse (prepended to the tail of the program) because the + // JumpIf jumps need to be computed from the end of the program. + programTail := []bpf.Instruction{ + // Fall-through rules jump into the filter. + bpf.Jump{Skip: 1}, + // Rules which jump to here get -ENOSYS. + bpf.RetConstant{Val: retErrnoEnosys}, + } + + // Generate the syscall -ENOSYS rules. + for auditArch, maxSyscalls := range lastSyscalls { + // The number of instructions from the tail of this section which need + // to be jumped in order to reach the -ENOSYS return. If the section + // does not jump, it will fall through to the actual filter. + baseJumpEnosys := uint32(len(programTail) - 1) + baseJumpFilter := baseJumpEnosys + 1 + + // Add the load instruction for the syscall number -- we jump here + // directly from the arch code so we need to do it here. Sadly we can't + // share this code between architecture branches. + section := []bpf.Instruction{ + // load [0] (syscall number) + bpf.LoadAbsolute{Off: 0, Size: bpfSizeofInt}, + } + + switch len(maxSyscalls) { + case 0: + // No syscalls found for this arch -- skip it and move on. + continue + case 1: + // Get the only syscall and scmpArch in the map. + var ( + scmpArch libseccomp.ScmpArch + sysno libseccomp.ScmpSyscall + ) + for arch, no := range maxSyscalls { + sysno = no + scmpArch = arch + } + + switch scmpArch { + // Return -ENOSYS for setup(2) on s390(x). This syscall is used for + // multiplexing "large syscall number" syscalls, but if the syscall + // number is not known to the kernel then the syscall number is + // left unchanged (and because it is sysno=0, you'll end up with + // EPERM for syscalls the kernel doesn't know about). + // + // The actual setup(2) syscall is never used by userspace anymore + // (and hasn't existed for decades) outside of this multiplexing + // scheme so returning -ENOSYS is fine. + case libseccomp.ArchS390, libseccomp.ArchS390X: + section = append(section, []bpf.Instruction{ + // jne [setup=0],1 + bpf.JumpIf{ + Cond: bpf.JumpNotEqual, + Val: uint32(s390xMultiplexSyscall), + SkipTrue: 1, + }, + // ret [ENOSYS] + bpf.RetConstant{Val: retErrnoEnosys}, + }...) + } + + // The simplest case just boils down to a single jgt instruction, + // with special handling if baseJumpEnosys is larger than 255 (and + // thus a long jump is required). + var sectionTail []bpf.Instruction + if baseJumpEnosys+1 <= 255 { + sectionTail = []bpf.Instruction{ + // jgt [syscall],[baseJumpEnosys+1] + bpf.JumpIf{ + Cond: bpf.JumpGreaterThan, + Val: uint32(sysno), + SkipTrue: uint8(baseJumpEnosys + 1), + }, + // ja [baseJumpFilter] + bpf.Jump{Skip: baseJumpFilter}, + } + } else { + sectionTail = []bpf.Instruction{ + // jle [syscall],1 + bpf.JumpIf{Cond: bpf.JumpLessOrEqual, Val: uint32(sysno), SkipTrue: 1}, + // ret [ENOSYS] + bpf.RetConstant{Val: retErrnoEnosys}, + // ja [baseJumpFilter] + bpf.Jump{Skip: baseJumpFilter}, + } + } + + // If we're on x86 we need to add a check for x32 and if we're in + // the wrong mode we jump over the section. + if uint32(auditArch) == uint32(C.C_AUDIT_ARCH_X86_64) { + // Generate a prefix to check the mode. + switch scmpArch { + case libseccomp.ArchAMD64: + sectionTail = append([]bpf.Instruction{ + // jset (1<<30),[len(tail)-1] + bpf.JumpIf{ + Cond: bpf.JumpBitsSet, + Val: 1 << 30, + SkipTrue: uint8(len(sectionTail) - 1), + }, + }, sectionTail...) + case libseccomp.ArchX32: + sectionTail = append([]bpf.Instruction{ + // jset (1<<30),0,[len(tail)-1] + bpf.JumpIf{ + Cond: bpf.JumpBitsNotSet, + Val: 1 << 30, + SkipTrue: uint8(len(sectionTail) - 1), + }, + }, sectionTail...) + default: + return nil, fmt.Errorf("unknown amd64 native architecture %#x", scmpArch) + } + } + + section = append(section, sectionTail...) + case 2: + // x32 and x86_64 are a unique case, we can't handle any others. + if uint32(auditArch) != uint32(C.C_AUDIT_ARCH_X86_64) { + return nil, fmt.Errorf("unknown architecture overlap on native arch %#x", auditArch) + } + + x32sysno, ok := maxSyscalls[libseccomp.ArchX32] + if !ok { + return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls) + } + x86sysno, ok := maxSyscalls[libseccomp.ArchAMD64] + if !ok { + return nil, fmt.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls) + } + + // The x32 ABI indicates that a syscall is being made by an x32 + // process by setting the 30th bit of the syscall number, but we + // need to do some special-casing depending on whether we need to + // do long jumps. + if baseJumpEnosys+2 <= 255 { + // For the simple case we want to have something like: + // jset (1<<30),1 + // jgt [x86 syscall],[baseJumpEnosys+2],1 + // jgt [x32 syscall],[baseJumpEnosys+1] + // ja [baseJumpFilter] + section = append(section, []bpf.Instruction{ + // jset (1<<30),1 + bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 1 << 30, SkipTrue: 1}, + // jgt [x86 syscall],[baseJumpEnosys+1],1 + bpf.JumpIf{ + Cond: bpf.JumpGreaterThan, + Val: uint32(x86sysno), + SkipTrue: uint8(baseJumpEnosys + 2), SkipFalse: 1, + }, + // jgt [x32 syscall],[baseJumpEnosys] + bpf.JumpIf{ + Cond: bpf.JumpGreaterThan, + Val: uint32(x32sysno), + SkipTrue: uint8(baseJumpEnosys + 1), + }, + // ja [baseJumpFilter] + bpf.Jump{Skip: baseJumpFilter}, + }...) + } else { + // But if the [baseJumpEnosys+2] jump is larger than 255 we + // need to do a long jump like so: + // jset (1<<30),1 + // jgt [x86 syscall],1,2 + // jle [x32 syscall],1 + // ret [ENOSYS] + // ja [baseJumpFilter] + section = append(section, []bpf.Instruction{ + // jset (1<<30),1 + bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 1 << 30, SkipTrue: 1}, + // jgt [x86 syscall],1,2 + bpf.JumpIf{ + Cond: bpf.JumpGreaterThan, + Val: uint32(x86sysno), + SkipTrue: 1, SkipFalse: 2, + }, + // jle [x32 syscall],1 + bpf.JumpIf{ + Cond: bpf.JumpLessOrEqual, + Val: uint32(x32sysno), + SkipTrue: 1, + }, + // ret [ENOSYS] + bpf.RetConstant{Val: retErrnoEnosys}, + // ja [baseJumpFilter] + bpf.Jump{Skip: baseJumpFilter}, + }...) + } + default: + return nil, fmt.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls)) + } + + // Prepend this section to the tail. + programTail = append(section, programTail...) + + // Update jump table. + archJumpTable[auditArch] = uint32(len(programTail)) + } + + // Add a dummy "jump to filter" for any architecture we might miss below. + // Such architectures will probably get the BadArch action of the filter + // regardless. + programTail = append([]bpf.Instruction{ + // ja [end of stub and start of filter] + bpf.Jump{Skip: uint32(len(programTail))}, + }, programTail...) + + // Generate the jump rules for each architecture. This has to be done in + // reverse as well for the same reason as above. We add to programTail + // directly because the jumps are impacted by each architecture rule we add + // as well. + // + // TODO: Maybe we want to optimise to avoid long jumps here? So sort the + // architectures based on how large the jumps are going to be, or + // re-sort the candidate architectures each time to make sure that we + // pick the largest jump which is going to be smaller than 255. + for auditArch := range lastSyscalls { + // We jump forwards but the jump table is calculated from the *END*. + jump := uint32(len(programTail)) - archJumpTable[auditArch] + + // Same routine as above -- this is a basic jeq check, complicated + // slightly if it turns out that we need to do a long jump. + if jump <= 255 { + programTail = append([]bpf.Instruction{ + // jeq [arch],[jump] + bpf.JumpIf{ + Cond: bpf.JumpEqual, + Val: uint32(auditArch), + SkipTrue: uint8(jump), + }, + }, programTail...) + } else { + programTail = append([]bpf.Instruction{ + // jne [arch],1 + bpf.JumpIf{ + Cond: bpf.JumpNotEqual, + Val: uint32(auditArch), + SkipTrue: 1, + }, + // ja [jump] + bpf.Jump{Skip: jump}, + }, programTail...) + } + } + + // Prepend the load instruction for the architecture. + programTail = append([]bpf.Instruction{ + // load [4] (architecture) + bpf.LoadAbsolute{Off: bpfSizeofInt, Size: bpfSizeofInt}, + }, programTail...) + + // And that's all folks! + return programTail, nil +} + +func assemble(program []bpf.Instruction) ([]unix.SockFilter, error) { + rawProgram, err := bpf.Assemble(program) + if err != nil { + return nil, fmt.Errorf("error assembling program: %w", err) + } + + // Convert to []unix.SockFilter for unix.SockFilter. + var filter []unix.SockFilter + for _, insn := range rawProgram { + filter = append(filter, unix.SockFilter{ + Code: insn.Op, + Jt: insn.Jt, + Jf: insn.Jf, + K: insn.K, + }) + } + return filter, nil +} + +func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) { + // Patch the generated cBPF only when there is not a defaultErrnoRet set + // and it is different from ENOSYS + if config.DefaultErrnoRet != nil && *config.DefaultErrnoRet == uint(retErrnoEnosys) { + return nil, nil + } + // We only add the stub if the default action is not permissive. + if isAllowAction(config.DefaultAction) { + logrus.Debugf("seccomp: skipping -ENOSYS stub filter generation") + return nil, nil + } + + lastSyscalls, err := findLastSyscalls(config) + if err != nil { + return nil, fmt.Errorf("error finding last syscalls for -ENOSYS stub: %w", err) + } + stubProgram, err := generateEnosysStub(lastSyscalls) + if err != nil { + return nil, fmt.Errorf("error generating -ENOSYS stub: %w", err) + } + return stubProgram, nil +} + +func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) ([]unix.SockFilter, error) { + program, err := disassembleFilter(filter) + if err != nil { + return nil, fmt.Errorf("error disassembling original filter: %w", err) + } + + patch, err := generatePatch(config) + if err != nil { + return nil, fmt.Errorf("error generating patch for filter: %w", err) + } + fullProgram := append(patch, program...) + + logrus.Debugf("seccomp: prepending -ENOSYS stub filter to user filter...") + for idx, insn := range patch { + logrus.Debugf(" [%4.1d] %s", idx, insn) + } + logrus.Debugf(" [....] --- original filter ---") + + fprog, err := assemble(fullProgram) + if err != nil { + return nil, fmt.Errorf("error assembling modified filter: %w", err) + } + return fprog, nil +} + +func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) { + // Ignore the error since pre-2.4 libseccomp is treated as API level 0. + apiLevel, _ := libseccomp.GetAPI() + + noNewPrivs, err = filter.GetNoNewPrivsBit() + if err != nil { + return 0, false, fmt.Errorf("unable to fetch no_new_privs filter bit: %w", err) + } + + if apiLevel >= 3 { + if logBit, err := filter.GetLogBit(); err != nil { + return 0, false, fmt.Errorf("unable to fetch SECCOMP_FILTER_FLAG_LOG bit: %w", err) + } else if logBit { + flags |= uint(C.C_FILTER_FLAG_LOG) + } + } + if apiLevel >= 4 { + if ssb, err := filter.GetSSB(); err != nil { + return 0, false, fmt.Errorf("unable to fetch SECCOMP_FILTER_FLAG_SPEC_ALLOW bit: %w", err) + } else if ssb { + flags |= uint(C.C_FILTER_FLAG_SPEC_ALLOW) + } + } + // XXX: add newly supported filter flags above this line. + + for _, call := range config.Syscalls { + if call.Action == configs.Notify { + flags |= uint(C.C_FILTER_FLAG_NEW_LISTENER) + break + } + } + + return flags, noNewPrivs, err +} + +func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err error) { + // This debug output is validated in tests/integration/seccomp.bats + // by the SECCOMP_FILTER_FLAG_* test. + logrus.Debugf("seccomp filter flags: %d", flags) + fprog := unix.SockFprog{ + Len: uint16(len(filter)), + Filter: &filter[0], + } + fd = -1 // only return a valid fd when C_FILTER_FLAG_NEW_LISTENER is set + // If no seccomp flags were requested we can use the old-school prctl(2). + if flags == 0 { + err = unix.Prctl(unix.PR_SET_SECCOMP, + unix.SECCOMP_MODE_FILTER, + uintptr(unsafe.Pointer(&fprog)), 0, 0) + } else { + fdptr, _, errno := unix.RawSyscall(unix.SYS_SECCOMP, + uintptr(C.C_SET_MODE_FILTER), + uintptr(flags), uintptr(unsafe.Pointer(&fprog))) + if errno != 0 { + err = errno + } + if flags&uint(C.C_FILTER_FLAG_NEW_LISTENER) != 0 { + fd = int(fdptr) + } + } + runtime.KeepAlive(filter) + runtime.KeepAlive(fprog) + return fd, err +} + +// PatchAndLoad takes a seccomp configuration and a libseccomp filter which has +// been pre-configured with the set of rules in the seccomp config. It then +// patches said filter to handle -ENOSYS in a much nicer manner than the +// default libseccomp default action behaviour, and loads the patched filter +// into the kernel for the current process. +func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (int, error) { + // Generate a patched filter. + fprog, err := enosysPatchFilter(config, filter) + if err != nil { + return -1, fmt.Errorf("error patching filter: %w", err) + } + + // Get the set of libseccomp flags set. + seccompFlags, noNewPrivs, err := filterFlags(config, filter) + if err != nil { + return -1, fmt.Errorf("unable to fetch seccomp filter flags: %w", err) + } + + // Set no_new_privs if it was requested, though in runc we handle + // no_new_privs separately so warn if we hit this path. + if noNewPrivs { + logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path") + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return -1, fmt.Errorf("error enabling no_new_privs bit: %w", err) + } + } + + // Finally, load the filter. + fd, err := sysSeccompSetFilter(seccompFlags, fprog) + if err != nil { + return -1, fmt.Errorf("error loading seccomp filter: %w", err) + } + + return fd, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go new file mode 100644 index 0000000000..2812ca4612 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go @@ -0,0 +1,3 @@ +//go:build !linux || !cgo || !seccomp + +package patchbpf diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go new file mode 100644 index 0000000000..e399972aa5 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -0,0 +1,350 @@ +//go:build cgo && seccomp + +package seccomp + +import ( + "errors" + "fmt" + + libseccomp "github.com/seccomp/libseccomp-golang" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf" + "github.com/opencontainers/runtime-spec/specs-go" +) + +var ( + actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) + actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) +) + +const ( + // Linux system calls can have at most 6 arguments + syscallMaxArguments int = 6 +) + +// InitSeccomp installs the seccomp filters to be used in the container as +// specified in config. +// Returns the seccomp file descriptor if any of the filters include a +// SCMP_ACT_NOTIFY action, otherwise returns -1. +func InitSeccomp(config *configs.Seccomp) (int, error) { + if config == nil { + return -1, errors.New("cannot initialize Seccomp - nil config passed") + } + + defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet) + if err != nil { + return -1, errors.New("error initializing seccomp - invalid default action") + } + + // Ignore the error since pre-2.4 libseccomp is treated as API level 0. + apiLevel, _ := libseccomp.GetAPI() + for _, call := range config.Syscalls { + if call.Action == configs.Notify { + if apiLevel < 6 { + return -1, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel) + } + + // We can't allow the write syscall to notify to the seccomp agent. + // After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain + // number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we + // never can write the seccomp fd to the parent and therefore the seccomp agent never receives + // the seccomp fd and runc is hang during initialization. + // + // Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY. + // Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and + // send the seccomp fd to the agent (it is another process and not subject to the seccomp + // filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp + // agent allows those syscalls to proceed, initialization works just fine and the agent can + // handle future read()/close() syscalls as it wanted. + if call.Name == "write" { + return -1, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall") + } + } + } + + // See comment on why write is not allowed. The same reason applies, as this can mean handling write too. + if defaultAction == libseccomp.ActNotify { + return -1, errors.New("SCMP_ACT_NOTIFY cannot be used as default action") + } + + filter, err := libseccomp.NewFilter(defaultAction) + if err != nil { + return -1, fmt.Errorf("error creating filter: %w", err) + } + + // Add extra architectures + for _, arch := range config.Architectures { + scmpArch, err := libseccomp.GetArchFromString(arch) + if err != nil { + return -1, fmt.Errorf("error validating Seccomp architecture: %w", err) + } + if err := filter.AddArch(scmpArch); err != nil { + return -1, fmt.Errorf("error adding architecture to seccomp filter: %w", err) + } + } + + // Add extra flags. + for _, flag := range config.Flags { + if err := setFlag(filter, flag); err != nil { + return -1, err + } + } + + // Enable libseccomp binary tree optimization for longer rulesets. + // + // The number below chosen semi-arbitrarily, considering the following: + // 1. libseccomp <= 2.5.4 misbehaves when binary tree optimization + // is enabled and there are 0 rules. + // 2. All known libseccomp versions (2.5.0 to 2.5.4) generate a binary + // tree with 4 syscalls per node. + if len(config.Syscalls) > 32 { + if err := filter.SetOptimize(2); err != nil { + // The error is not fatal and is probably means we have older libseccomp. + logrus.Debugf("seccomp binary tree optimization not available: %v", err) + } + } + + // Unset no new privs bit + if err := filter.SetNoNewPrivsBit(false); err != nil { + return -1, fmt.Errorf("error setting no new privileges: %w", err) + } + + // Add a rule for each syscall + for _, call := range config.Syscalls { + if call == nil { + return -1, errors.New("encountered nil syscall while initializing Seccomp") + } + + if err := matchCall(filter, call, defaultAction); err != nil { + return -1, err + } + } + + seccompFd, err := patchbpf.PatchAndLoad(config, filter) + if err != nil { + return -1, fmt.Errorf("error loading seccomp filter into kernel: %w", err) + } + + return seccompFd, nil +} + +type unknownFlagError struct { + flag specs.LinuxSeccompFlag +} + +func (e *unknownFlagError) Error() string { + return "seccomp flag " + string(e.flag) + " is not known to runc" +} + +func setFlag(filter *libseccomp.ScmpFilter, flag specs.LinuxSeccompFlag) error { + switch flag { + case flagTsync: + // libseccomp-golang always use filterAttrTsync when + // possible so all goroutines will receive the same + // rules, so there is nothing to do. It does not make + // sense to apply the seccomp filter on only one + // thread; other threads will be terminated after exec + // anyway. + return nil + case specs.LinuxSeccompFlagLog: + if err := filter.SetLogBit(true); err != nil { + return fmt.Errorf("error adding log flag to seccomp filter: %w", err) + } + return nil + case specs.LinuxSeccompFlagSpecAllow: + if err := filter.SetSSB(true); err != nil { + return fmt.Errorf("error adding SSB flag to seccomp filter: %w", err) + } + return nil + } + // NOTE when adding more flags above, do not forget to also: + // - add new flags to `flags` slice in config.go; + // - add new flag values to flags_value() in tests/integration/seccomp.bats; + // - modify func filterFlags in patchbpf/ accordingly. + + return &unknownFlagError{flag: flag} +} + +// FlagSupported checks if the flag is known to runc and supported by +// currently used libseccomp and kernel (i.e. it can be set). +func FlagSupported(flag specs.LinuxSeccompFlag) error { + filter := &libseccomp.ScmpFilter{} + err := setFlag(filter, flag) + + // For flags we don't know, setFlag returns unknownFlagError. + var uf *unknownFlagError + if errors.As(err, &uf) { + return err + } + // For flags that are known to runc and libseccomp-golang but can not + // be applied because either libseccomp or the kernel is too old, + // seccomp.VersionError is returned. + var verErr *libseccomp.VersionError + if errors.As(err, &verErr) { + // Not supported by libseccomp or the kernel. + return err + } + + // All other flags are known and supported. + return nil +} + +// Convert Libcontainer Action to Libseccomp ScmpAction +func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) { + switch act { + case configs.Kill, configs.KillThread: + return libseccomp.ActKillThread, nil + case configs.Errno: + if errnoRet != nil { + return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil + } + return actErrno, nil + case configs.Trap: + return libseccomp.ActTrap, nil + case configs.Allow: + return libseccomp.ActAllow, nil + case configs.Trace: + if errnoRet != nil { + return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil + } + return actTrace, nil + case configs.Log: + return libseccomp.ActLog, nil + case configs.Notify: + return libseccomp.ActNotify, nil + case configs.KillProcess: + return libseccomp.ActKillProcess, nil + default: + return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule") + } +} + +// Convert Libcontainer Operator to Libseccomp ScmpCompareOp +func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { + switch op { + case configs.EqualTo: + return libseccomp.CompareEqual, nil + case configs.NotEqualTo: + return libseccomp.CompareNotEqual, nil + case configs.GreaterThan: + return libseccomp.CompareGreater, nil + case configs.GreaterThanOrEqualTo: + return libseccomp.CompareGreaterEqual, nil + case configs.LessThan: + return libseccomp.CompareLess, nil + case configs.LessThanOrEqualTo: + return libseccomp.CompareLessOrEqual, nil + case configs.MaskEqualTo: + return libseccomp.CompareMaskedEqual, nil + default: + return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule") + } +} + +// Convert Libcontainer Arg to Libseccomp ScmpCondition +func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { + cond := libseccomp.ScmpCondition{} + + if arg == nil { + return cond, errors.New("cannot convert nil to syscall condition") + } + + op, err := getOperator(arg.Op) + if err != nil { + return cond, err + } + + return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) +} + +// Add a rule to match a single syscall +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error { + if call == nil || filter == nil { + return errors.New("cannot use nil as syscall to block") + } + + if len(call.Name) == 0 { + return errors.New("empty string is not a valid syscall") + } + + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action, call.ErrnoRet) + if err != nil { + return fmt.Errorf("action in seccomp profile is invalid: %w", err) + } + if callAct == defAct { + // This rule is redundant, silently skip it + // to avoid error from AddRule. + return nil + } + + // If we can't resolve the syscall, assume it is not supported + // by this kernel. Warn about it, don't error out. + callNum, err := libseccomp.GetSyscallFromName(call.Name) + if err != nil { + logrus.Debugf("unknown seccomp syscall %q ignored", call.Name) + return nil + } + + // Unconditional match - just add the rule + if len(call.Args) == 0 { + if err := filter.AddRule(callNum, callAct); err != nil { + return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err) + } + } else { + // If two or more arguments have the same condition, + // Revert to old behavior, adding each condition as a separate rule + argCounts := make([]uint, syscallMaxArguments) + conditions := []libseccomp.ScmpCondition{} + + for _, cond := range call.Args { + newCond, err := getCondition(cond) + if err != nil { + return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err) + } + + argCounts[cond.Index] += 1 + + conditions = append(conditions, newCond) + } + + hasMultipleArgs := false + for _, count := range argCounts { + if count > 1 { + hasMultipleArgs = true + break + } + } + + if hasMultipleArgs { + // Revert to old behavior + // Add each condition attached to a separate rule + for _, cond := range conditions { + condArr := []libseccomp.ScmpCondition{cond} + + if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err) + } + } + } else { + // No conditions share same argument + // Use new, proper behavior + if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err) + } + } + } + + return nil +} + +// Version returns major, minor, and micro. +func Version() (uint, uint, uint) { + return libseccomp.GetLibraryVersion() +} + +// Enabled is true if seccomp support is compiled in. +const Enabled = true diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go new file mode 100644 index 0000000000..25713f2327 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go @@ -0,0 +1,33 @@ +//go:build !linux || !cgo || !seccomp + +package seccomp + +import ( + "errors" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" +) + +var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") + +// InitSeccomp does nothing because seccomp is not supported. +func InitSeccomp(config *configs.Seccomp) (int, error) { + if config != nil { + return -1, ErrSeccompNotEnabled + } + return -1, nil +} + +// FlagSupported tells if a provided seccomp flag is supported. +func FlagSupported(_ specs.LinuxSeccompFlag) error { + return ErrSeccompNotEnabled +} + +// Version returns major, minor, and micro. +func Version() (uint, uint, uint) { + return 0, 0, 0 +} + +// Enabled is true if seccomp support is compiled in. +const Enabled = false diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go new file mode 100644 index 0000000000..0a79f197e6 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go @@ -0,0 +1,158 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "os/exec" + + "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" +) + +// linuxSetnsInit performs the container's initialization for running a new process +// inside an existing container. +type linuxSetnsInit struct { + pipe *syncSocket + consoleSocket *os.File + pidfdSocket *os.File + config *initConfig + logPipe *os.File +} + +func (l *linuxSetnsInit) getSessionRingName() string { + return "_ses." + l.config.ContainerID +} + +func (l *linuxSetnsInit) Init() error { + if !l.config.Config.NoNewKeyring { + if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil { + return err + } + defer selinux.SetKeyLabel("") //nolint: errcheck + // Do not inherit the parent's session keyring. + if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil { + // Same justification as in standart_init_linux.go as to why we + // don't bail on ENOSYS. + // + // TODO(cyphar): And we should have logging here too. + if !errors.Is(err, unix.ENOSYS) { + return fmt.Errorf("unable to join session keyring: %w", err) + } + } + } + + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, false); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return err + } + } + if l.pidfdSocket != nil { + if err := setupPidfd(l.pidfdSocket, "setns"); err != nil { + return fmt.Errorf("failed to setup pidfd: %w", err) + } + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return err + } + } + if l.config.Config.Umask != nil { + unix.Umask(int(*l.config.Config.Umask)) + } + + if err := setupScheduler(l.config); err != nil { + return err + } + + if err := setupIOPriority(l.config); err != nil { + return err + } + // Tell our parent that we're ready to exec. This must be done before the + // Seccomp rules have been applied, because we need to be able to read and + // write to a socket. + if err := syncParentReady(l.pipe); err != nil { + return fmt.Errorf("sync ready: %w", err) + } + + if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil { + return err + } + defer selinux.SetExecLabel("") //nolint: errcheck + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { + seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp) + if err != nil { + return err + } + if err := syncParentSeccomp(l.pipe, seccompFd); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return err + } + if l.config.Config.Personality != nil { + if err := setupPersonality(l.config.Config); err != nil { + return err + } + } + // Check for the arg early to make sure it exists. + name, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp) + if err != nil { + return fmt.Errorf("unable to init seccomp: %w", err) + } + if err := syncParentSeccomp(l.pipe, seccompFd); err != nil { + return err + } + } + + // Close the pipe to signal that we have completed our init. + // Please keep this because we don't want to get a pipe write error if + // there is an error from `execve` after all fds closed. + _ = l.pipe.Close() + + // Close the log pipe fd so the parent's ForwardLogs can exit. + logrus.Debugf("setns_init: about to exec") + if err := l.logPipe.Close(); err != nil { + return fmt.Errorf("close log pipe: %w", err) + } + + // Close all file descriptors we are not passing to the container. This is + // necessary because the execve target could use internal runc fds as the + // execve path, potentially giving access to binary files from the host + // (which can then be opened by container processes, leading to container + // escapes). Note that because this operation will close any open file + // descriptors that are referenced by (*os.File) handles from underneath + // the Go runtime, we must not do any file operations after this point + // (otherwise the (*os.File) finaliser could close the wrong file). See + // CVE-2024-21626 for more information as to why this protection is + // necessary. + if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { + return err + } + return system.Exec(name, l.config.Args, l.config.Env) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go new file mode 100644 index 0000000000..21516bd338 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go @@ -0,0 +1,298 @@ +package libcontainer + +import ( + "errors" + "fmt" + "os" + "os/exec" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/opencontainers/selinux/go-selinux" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/keys" + "github.com/opencontainers/runc/libcontainer/seccomp" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/opencontainers/runc/libcontainer/utils" +) + +type linuxStandardInit struct { + pipe *syncSocket + consoleSocket *os.File + pidfdSocket *os.File + parentPid int + fifoFile *os.File + logPipe *os.File + config *initConfig +} + +func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { + var newperms uint32 + + if l.config.Config.Namespaces.Contains(configs.NEWUSER) { + // With user ns we need 'other' search permissions. + newperms = 0x8 + } else { + // Without user ns we need 'UID' search permissions. + newperms = 0x80000 + } + + // Create a unique per session container name that we can join in setns; + // However, other containers can also join it. + return "_ses." + l.config.ContainerID, 0xffffffff, newperms +} + +func (l *linuxStandardInit) Init() error { + if !l.config.Config.NoNewKeyring { + if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil { + return err + } + defer selinux.SetKeyLabel("") //nolint: errcheck + ringname, keepperms, newperms := l.getSessionRingParams() + + // Do not inherit the parent's session keyring. + if sessKeyId, err := keys.JoinSessionKeyring(ringname); err != nil { + // If keyrings aren't supported then it is likely we are on an + // older kernel (or inside an LXC container). While we could bail, + // the security feature we are using here is best-effort (it only + // really provides marginal protection since VFS credentials are + // the only significant protection of keyrings). + // + // TODO(cyphar): Log this so people know what's going on, once we + // have proper logging in 'runc init'. + if !errors.Is(err, unix.ENOSYS) { + return fmt.Errorf("unable to join session keyring: %w", err) + } + } else { + // Make session keyring searchable. If we've gotten this far we + // bail on any error -- we don't want to have a keyring with bad + // permissions. + if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { + return fmt.Errorf("unable to mod keyring permissions: %w", err) + } + } + } + + if err := setupNetwork(l.config); err != nil { + return err + } + if err := setupRoute(l.config.Config); err != nil { + return err + } + + // initialises the labeling system + selinux.GetEnabled() + + err := prepareRootfs(l.pipe, l.config) + if err != nil { + return err + } + + // Set up the console. This has to be done *before* we finalize the rootfs, + // but *after* we've given the user the chance to set up all of the mounts + // they wanted. + if l.config.CreateConsole { + if err := setupConsole(l.consoleSocket, l.config, true); err != nil { + return err + } + if err := system.Setctty(); err != nil { + return &os.SyscallError{Syscall: "ioctl(setctty)", Err: err} + } + } + + if l.pidfdSocket != nil { + if err := setupPidfd(l.pidfdSocket, "standard"); err != nil { + return fmt.Errorf("failed to setup pidfd: %w", err) + } + } + + // Finish the rootfs setup. + if l.config.Config.Namespaces.Contains(configs.NEWNS) { + if err := finalizeRootfs(l.config.Config); err != nil { + return err + } + } + + if hostname := l.config.Config.Hostname; hostname != "" { + if err := unix.Sethostname([]byte(hostname)); err != nil { + return &os.SyscallError{Syscall: "sethostname", Err: err} + } + } + if domainname := l.config.Config.Domainname; domainname != "" { + if err := unix.Setdomainname([]byte(domainname)); err != nil { + return &os.SyscallError{Syscall: "setdomainname", Err: err} + } + } + if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { + return fmt.Errorf("unable to apply apparmor profile: %w", err) + } + + if err := sys.WriteSysctls(l.config.Config.Sysctl); err != nil { + return err + } + for _, path := range l.config.Config.ReadonlyPaths { + if err := readonlyPath(path); err != nil { + return fmt.Errorf("can't make %q read-only: %w", path, err) + } + } + + if err := maskPaths(l.config.Config.MaskPaths, l.config.Config.MountLabel); err != nil { + return err + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { + return fmt.Errorf("can't get pdeath signal: %w", err) + } + if l.config.NoNewPrivileges { + if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { + return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err} + } + } + + if err := setupScheduler(l.config); err != nil { + return err + } + + if err := setupIOPriority(l.config); err != nil { + return err + } + + // Tell our parent that we're ready to exec. This must be done before the + // Seccomp rules have been applied, because we need to be able to read and + // write to a socket. + if err := syncParentReady(l.pipe); err != nil { + return fmt.Errorf("sync ready: %w", err) + } + if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil { + return fmt.Errorf("can't set process label: %w", err) + } + defer selinux.SetExecLabel("") //nolint: errcheck + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { + seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp) + if err != nil { + return err + } + + if err := syncParentSeccomp(l.pipe, seccompFd); err != nil { + return err + } + } + if err := finalizeNamespace(l.config); err != nil { + return err + } + // finalizeNamespace can change user/group which clears the parent death + // signal, so we restore it here. + if err := pdeath.Restore(); err != nil { + return fmt.Errorf("can't restore pdeath signal: %w", err) + } + + // In case we have any StartContainer hooks to run, and they don't + // have environment configured explicitly, make sure they will be run + // with the same environment as container's init. + // + // NOTE the above described behavior is not part of runtime-spec, but + // rather a de facto historical thing we afraid to change. + if h := l.config.Config.Hooks[configs.StartContainer]; len(h) > 0 { + h.SetDefaultEnv(l.config.Env) + } + + // Compare the parent from the initial start of the init process and make + // sure that it did not change. if the parent changes that means it died + // and we were reparented to something else so we should just kill ourself + // and not cause problems for someone else. + if unix.Getppid() != l.parentPid { + return unix.Kill(unix.Getpid(), unix.SIGKILL) + } + // Check for the arg before waiting to make sure it exists and it is + // returned as a create time error. + name, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). However, this needs to be done + // before closing the pipe since we need it to pass the seccompFd to + // the parent. + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + seccompFd, err := seccomp.InitSeccomp(l.config.Config.Seccomp) + if err != nil { + return fmt.Errorf("unable to init seccomp: %w", err) + } + + if err := syncParentSeccomp(l.pipe, seccompFd); err != nil { + return err + } + } + + // Set personality if specified. + if l.config.Config.Personality != nil { + if err := setupPersonality(l.config.Config); err != nil { + return err + } + } + + // Close the pipe to signal that we have completed our init. + logrus.Debugf("init: closing the pipe to signal completion") + _ = l.pipe.Close() + + // Close the log pipe fd so the parent's ForwardLogs can exit. + logrus.Debugf("init: about to wait on exec fifo") + if err := l.logPipe.Close(); err != nil { + return fmt.Errorf("close log pipe: %w", err) + } + + // Wait for the FIFO to be opened on the other side before exec-ing the + // user process. We open it through /proc/self/fd/$fd, because the fd that + // was given to us was an O_PATH fd to the fifo itself. Linux allows us to + // re-open an O_PATH fd through /proc. + fifoFile, err := pathrs.Reopen(l.fifoFile, unix.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("reopen exec fifo: %w", err) + } + defer fifoFile.Close() + if _, err := fifoFile.Write([]byte("0")); err != nil { + return &os.PathError{Op: "write exec fifo", Path: fifoFile.Name(), Err: err} + } + + // Close the O_PATH fifofd fd before exec because the kernel resets + // dumpable in the wrong order. This has been fixed in newer kernels, but + // we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels. + // N.B. the core issue itself (passing dirfds to the host filesystem) has + // since been resolved. + // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 + _ = fifoFile.Close() + _ = l.fifoFile.Close() + + if s := l.config.SpecState; s != nil { + s.Pid = unix.Getpid() + s.Status = specs.StateCreated + if err := l.config.Config.Hooks.Run(configs.StartContainer, s); err != nil { + return err + } + } + + // Close all file descriptors we are not passing to the container. This is + // necessary because the execve target could use internal runc fds as the + // execve path, potentially giving access to binary files from the host + // (which can then be opened by container processes, leading to container + // escapes). Note that because this operation will close any open file + // descriptors that are referenced by (*os.File) handles from underneath + // the Go runtime, we must not do any file operations after this point + // (otherwise the (*os.File) finaliser could close the wrong file). See + // CVE-2024-21626 for more information as to why this protection is + // necessary. + if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { + return err + } + return system.Exec(name, l.config.Args, l.config.Env) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go new file mode 100644 index 0000000000..2b7b8b5bc3 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go @@ -0,0 +1,244 @@ +package libcontainer + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" +) + +func newStateTransitionError(from, to containerState) error { + return &stateTransitionError{ + From: from.status().String(), + To: to.status().String(), + } +} + +// stateTransitionError is returned when an invalid state transition happens from one +// state to another. +type stateTransitionError struct { + From string + To string +} + +func (s *stateTransitionError) Error() string { + return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To) +} + +type containerState interface { + transition(containerState) error + destroy() error + status() Status +} + +func destroy(c *Container) error { + // Usually, when a container init is gone, all other processes in its + // cgroup are killed by the kernel. This is not the case for a shared + // PID namespace container, which may have some processes left after + // its init is killed or exited. + // + // As the container without init process running is considered stopped, + // and destroy is supposed to remove all the container resources, we need + // to kill those processes here. + if !c.config.Namespaces.IsPrivate(configs.NEWPID) { + // Likely to fail when c.config.RootlessCgroups is true + _ = signalAllProcesses(c.cgroupManager, unix.SIGKILL) + } + if err := c.cgroupManager.Destroy(); err != nil { + return fmt.Errorf("unable to remove container's cgroup: %w", err) + } + if c.intelRdtManager != nil { + if err := c.intelRdtManager.Destroy(); err != nil { + return fmt.Errorf("unable to remove container's IntelRDT group: %w", err) + } + } + if err := os.RemoveAll(c.stateDir); err != nil { + return fmt.Errorf("unable to remove container state dir: %w", err) + } + c.initProcess = nil + err := runPoststopHooks(c) + c.state = &stoppedState{c: c} + return err +} + +func runPoststopHooks(c *Container) error { + hooks := c.config.Hooks + if hooks == nil { + return nil + } + + s, err := c.currentOCIState() + if err != nil { + return err + } + s.Status = specs.StateStopped + + return hooks.Run(configs.Poststop, s) +} + +// stoppedState represents a container is a stopped/destroyed state. +type stoppedState struct { + c *Container +} + +func (b *stoppedState) status() Status { + return Stopped +} + +func (b *stoppedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *restoredState: + b.c.state = s + return nil + case *stoppedState: + return nil + } + return newStateTransitionError(b, s) +} + +func (b *stoppedState) destroy() error { + return destroy(b.c) +} + +// runningState represents a container that is currently running. +type runningState struct { + c *Container +} + +func (r *runningState) status() Status { + return Running +} + +func (r *runningState) transition(s containerState) error { + switch s.(type) { + case *stoppedState: + if r.c.hasInit() { + return ErrRunning + } + r.c.state = s + return nil + case *pausedState: + r.c.state = s + return nil + case *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *runningState) destroy() error { + if r.c.hasInit() { + return ErrRunning + } + return destroy(r.c) +} + +type createdState struct { + c *Container +} + +func (i *createdState) status() Status { + return Created +} + +func (i *createdState) transition(s containerState) error { + switch s.(type) { + case *runningState, *pausedState, *stoppedState: + i.c.state = s + return nil + case *createdState: + return nil + } + return newStateTransitionError(i, s) +} + +func (i *createdState) destroy() error { + _ = i.c.initProcess.signal(unix.SIGKILL) + return destroy(i.c) +} + +// pausedState represents a container that is currently pause. It cannot be destroyed in a +// paused state and must transition back to running first. +type pausedState struct { + c *Container +} + +func (p *pausedState) status() Status { + return Paused +} + +func (p *pausedState) transition(s containerState) error { + switch s.(type) { + case *runningState, *stoppedState: + p.c.state = s + return nil + case *pausedState: + return nil + } + return newStateTransitionError(p, s) +} + +func (p *pausedState) destroy() error { + if p.c.hasInit() { + return ErrPaused + } + if err := p.c.cgroupManager.Freeze(cgroups.Thawed); err != nil { + return err + } + return destroy(p.c) +} + +// restoredState is the same as the running state but also has associated checkpoint +// information that maybe need destroyed when the container is stopped and destroy is called. +type restoredState struct { + imageDir string + c *Container +} + +func (r *restoredState) status() Status { + return Running +} + +func (r *restoredState) transition(s containerState) error { + switch s.(type) { + case *stoppedState, *runningState: + return nil + } + return newStateTransitionError(r, s) +} + +func (r *restoredState) destroy() error { + if _, err := os.Stat(filepath.Join(r.c.stateDir, "checkpoint")); err != nil { + if !os.IsNotExist(err) { + return err + } + } + return destroy(r.c) +} + +// loadedState is used whenever a container is restored, loaded, or setting additional +// processes inside and it should not be destroyed when it is exiting. +type loadedState struct { + c *Container + s Status +} + +func (n *loadedState) status() Status { + return n.s +} + +func (n *loadedState) transition(s containerState) error { + n.c.state = s + return nil +} + +func (n *loadedState) destroy() error { + if err := n.c.refreshState(); err != nil { + return err + } + return n.c.state.destroy() +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go new file mode 100644 index 0000000000..e776aad435 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go @@ -0,0 +1,13 @@ +package libcontainer + +import ( + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/types" +) + +type Stats struct { + Interfaces []*types.NetworkInterface + CgroupStats *cgroups.Stats + IntelRdtStats *intelrdt.Stats +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/sync.go b/vendor/github.com/opencontainers/runc/libcontainer/sync.go new file mode 100644 index 0000000000..0a54a4b81e --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/sync.go @@ -0,0 +1,203 @@ +package libcontainer + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "strconv" + + "github.com/opencontainers/runc/libcontainer/utils" + + "github.com/sirupsen/logrus" +) + +type syncType string + +// Constants that are used for synchronisation between the parent and child +// during container setup. They come in pairs (with procError being a generic +// response which is followed by an &initError). +// +// [ child ] <-> [ parent ] +// +// procMountPlease --> [open(2) or open_tree(2) and configure mount] +// Arg: configs.Mount +// <-- procMountFd +// file: mountfd +// +// procSeccomp --> [forward fd to listenerPath] +// file: seccomp fd +// --- no return synchronisation +// +// procHooks --> [run hooks] +// <-- procHooksDone +// +// procReady --> [final setup] +// <-- procRun +// +// procSeccomp --> [grab seccomp fd with pidfd_getfd()] +// <-- procSeccompDone +const ( + procError syncType = "procError" + procReady syncType = "procReady" + procRun syncType = "procRun" + procHooks syncType = "procHooks" + procHooksDone syncType = "procHooksDone" + procMountPlease syncType = "procMountPlease" + procMountFd syncType = "procMountFd" + procSeccomp syncType = "procSeccomp" + procSeccompDone syncType = "procSeccompDone" +) + +type syncFlags int + +const ( + syncFlagHasFd syncFlags = (1 << iota) +) + +type syncT struct { + Type syncType `json:"type"` + Flags syncFlags `json:"flags"` + Arg *json.RawMessage `json:"arg,omitempty"` + File *os.File `json:"-"` // passed oob through SCM_RIGHTS +} + +func (s syncT) String() string { + str := "type:" + string(s.Type) + if s.Flags != 0 { + str += " flags:0b" + strconv.FormatInt(int64(s.Flags), 2) + } + if s.Arg != nil { + str += " arg:" + string(*s.Arg) + } + if s.File != nil { + str += " file:" + s.File.Name() + " (fd:" + strconv.Itoa(int(s.File.Fd())) + ")" + } + return str +} + +// initError is used to wrap errors for passing them via JSON, +// as encoding/json can't unmarshal into error type. +type initError struct { + Message string `json:"message,omitempty"` +} + +func (i initError) Error() string { + return i.Message +} + +func doWriteSync(pipe *syncSocket, sync syncT) error { + sync.Flags &= ^syncFlagHasFd + if sync.File != nil { + sync.Flags |= syncFlagHasFd + } + logrus.Debugf("writing sync %s", sync) + data, err := json.Marshal(sync) + if err != nil { + return fmt.Errorf("marshal sync %v: %w", sync.Type, err) + } + if _, err := pipe.WritePacket(data); err != nil { + return fmt.Errorf("writing sync %v: %w", sync.Type, err) + } + if sync.Flags&syncFlagHasFd != 0 { + logrus.Debugf("writing sync file %s", sync) + if err := utils.SendFile(pipe.File(), sync.File); err != nil { + return fmt.Errorf("sending file after sync %q: %w", sync.Type, err) + } + } + return nil +} + +func writeSync(pipe *syncSocket, sync syncType) error { + return doWriteSync(pipe, syncT{Type: sync}) +} + +func writeSyncArg(pipe *syncSocket, sync syncType, arg interface{}) error { + argJSON, err := json.Marshal(arg) + if err != nil { + return fmt.Errorf("writing sync %v: marshal argument failed: %w", sync, err) + } + argJSONMsg := json.RawMessage(argJSON) + return doWriteSync(pipe, syncT{Type: sync, Arg: &argJSONMsg}) +} + +func doReadSync(pipe *syncSocket) (syncT, error) { + var sync syncT + logrus.Debugf("reading sync") + packet, err := pipe.ReadPacket() + if err != nil { + if errors.Is(err, io.EOF) { + logrus.Debugf("sync pipe closed") + return sync, err + } + return sync, fmt.Errorf("reading from parent failed: %w", err) + } + if err := json.Unmarshal(packet, &sync); err != nil { + return sync, fmt.Errorf("unmarshal sync from parent failed: %w", err) + } + logrus.Debugf("read sync %s", sync) + if sync.Type == procError { + var ierr initError + if sync.Arg == nil { + return sync, errors.New("procError missing error payload") + } + if err := json.Unmarshal(*sync.Arg, &ierr); err != nil { + return sync, fmt.Errorf("unmarshal procError failed: %w", err) + } + return sync, &ierr + } + if sync.Flags&syncFlagHasFd != 0 { + logrus.Debugf("reading sync file %s", sync) + file, err := utils.RecvFile(pipe.File()) + if err != nil { + return sync, fmt.Errorf("receiving fd from sync %v failed: %w", sync.Type, err) + } + sync.File = file + } + return sync, nil +} + +func readSyncFull(pipe *syncSocket, expected syncType) (syncT, error) { + sync, err := doReadSync(pipe) + if err != nil { + return sync, err + } + if sync.Type != expected { + return sync, fmt.Errorf("unexpected synchronisation flag: got %q, expected %q", sync.Type, expected) + } + return sync, nil +} + +func readSync(pipe *syncSocket, expected syncType) error { + sync, err := readSyncFull(pipe, expected) + if err != nil { + return err + } + if sync.Arg != nil { + return fmt.Errorf("sync %v had unexpected argument passed: %q", expected, string(*sync.Arg)) + } + if sync.File != nil { + _ = sync.File.Close() + return fmt.Errorf("sync %v had unexpected file passed", sync.Type) + } + return nil +} + +// parseSync runs the given callback function on each syncT received from the +// child. It will return once io.EOF is returned from the given pipe. +func parseSync(pipe *syncSocket, fn func(*syncT) error) error { + for { + sync, err := doReadSync(pipe) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return err + } + if err := fn(&sync); err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/sync_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/sync_unix.go new file mode 100644 index 0000000000..69c0228dbe --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/sync_unix.go @@ -0,0 +1,95 @@ +package libcontainer + +import ( + "fmt" + "io" + "os" + "sync/atomic" + + "golang.org/x/sys/unix" +) + +// syncSocket is a wrapper around a SOCK_SEQPACKET socket, providing +// packet-oriented methods. This is needed because SOCK_SEQPACKET does not +// allow for partial reads, but the Go stdlib treats it as a streamable source, +// which ends up making things like json.Decoder hang forever if the packet is +// bigger than the internal read buffer. +type syncSocket struct { + f *os.File + closed atomic.Bool +} + +func newSyncSocket(f *os.File) *syncSocket { + return &syncSocket{f: f} +} + +func (s *syncSocket) File() *os.File { + return s.f +} + +func (s *syncSocket) Close() error { + // Even with errors from Close(), we have to assume the pipe was closed. + s.closed.Store(true) + return s.f.Close() +} + +func (s *syncSocket) isClosed() bool { + return s.closed.Load() +} + +func (s *syncSocket) WritePacket(b []byte) (int, error) { + return s.f.Write(b) +} + +func (s *syncSocket) ReadPacket() ([]byte, error) { + var ( + size int + err error + ) + + for { + size, _, err = unix.Recvfrom(int(s.f.Fd()), nil, unix.MSG_TRUNC|unix.MSG_PEEK) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + break + } + } + + if err != nil { + return nil, fmt.Errorf("fetch packet length from socket: %w", os.NewSyscallError("recvfrom", err)) + } + // We will only get a zero size if the socket has been closed from the + // other end (otherwise recvfrom(2) will block until a packet is ready). In + // addition, SOCK_SEQPACKET is treated as a stream source by Go stdlib so + // returning io.EOF here is correct from that perspective too. + if size == 0 { + return nil, io.EOF + } + buf := make([]byte, size) + n, err := s.f.Read(buf) + if err != nil { + return nil, err + } + if n != size { + return nil, fmt.Errorf("packet read too short: expected %d byte packet but only %d bytes read", size, n) + } + return buf, nil +} + +func (s *syncSocket) Shutdown(how int) error { + if err := unix.Shutdown(int(s.f.Fd()), how); err != nil { + return &os.PathError{Op: "shutdown", Path: s.f.Name() + " (sync pipe)", Err: err} + } + return nil +} + +// newSyncSockpair returns a new SOCK_SEQPACKET unix socket pair to be used for +// runc-init synchronisation. +func newSyncSockpair(name string) (parent, child *syncSocket, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + parentFile := os.NewFile(uintptr(fds[1]), name+"-p") + childFile := os.NewFile(uintptr(fds[0]), name+"-c") + return newSyncSocket(parentFile), newSyncSocket(childFile), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go new file mode 100644 index 0000000000..5e558c4f99 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -0,0 +1,191 @@ +//go:build linux + +package system + +import ( + "fmt" + "io" + "os" + "unsafe" + + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + +func Exec(cmd string, args []string, env []string) error { + for { + err := unix.Exec(cmd, args, env) + if err != unix.EINTR { + return &os.PathError{Op: "exec", Path: cmd, Err: err} + } + } +} + +func SetParentDeathSignal(sig uintptr) error { + if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { + return err + } + return nil +} + +func GetParentDeathSignal() (ParentDeathSignal, error) { + var sig int + if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { + return -1, err + } + return ParentDeathSignal(sig), nil +} + +func SetKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func ClearKeepCaps() error { + if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { + return err + } + + return nil +} + +func Setctty() error { + if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { + return err + } + return nil +} + +// SetSubreaper sets the value i as the subreaper setting for the calling process +func SetSubreaper(i int) error { + return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) +} + +// GetSubreaper returns the subreaper setting for the calling process +func GetSubreaper() (int, error) { + var i uintptr + + if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { + return -1, err + } + + return int(i), nil +} + +func ExecutableMemfd(comment string, flags int) (*os.File, error) { + // Try to use MFD_EXEC first. On pre-6.3 kernels we get -EINVAL for this + // flag. On post-6.3 kernels, with vm.memfd_noexec=1 this ensures we get an + // executable memfd. For vm.memfd_noexec=2 this is a bit more complicated. + // The original vm.memfd_noexec=2 implementation incorrectly silently + // allowed MFD_EXEC[1] -- this should be fixed in 6.6. On 6.6 and newer + // kernels, we will get -EACCES if we try to use MFD_EXEC with + // vm.memfd_noexec=2 (for 6.3-6.5, -EINVAL was the intended return value). + // + // The upshot is we only need to retry without MFD_EXEC on -EINVAL because + // it just so happens that passing MFD_EXEC bypasses vm.memfd_noexec=2 on + // kernels where -EINVAL is actually a security denial. + memfd, err := unix.MemfdCreate(comment, flags|unix.MFD_EXEC) + if err == unix.EINVAL { + memfd, err = unix.MemfdCreate(comment, flags) + } + if err != nil { + if err == unix.EACCES { + logrus.Info("memfd_create(MFD_EXEC) failed, possibly due to vm.memfd_noexec=2 -- falling back to less secure O_TMPFILE") + } + err := os.NewSyscallError("memfd_create", err) + return nil, fmt.Errorf("failed to create executable memfd: %w", err) + } + return os.NewFile(uintptr(memfd), "/memfd:"+comment), nil +} + +// Copy is like io.Copy except it uses sendfile(2) if the source and sink are +// both (*os.File) as an optimisation to make copies faster. +func Copy(dst io.Writer, src io.Reader) (copied int64, err error) { + dstFile, _ := dst.(*os.File) + srcFile, _ := src.(*os.File) + + if dstFile != nil && srcFile != nil { + fi, err := srcFile.Stat() + if err != nil { + goto fallback + } + size := fi.Size() + for size > 0 { + n, err := unix.Sendfile(int(dstFile.Fd()), int(srcFile.Fd()), nil, int(size)) + if n > 0 { + size -= int64(n) + copied += int64(n) + } + if err == unix.EINTR { + continue + } + if err != nil { + if copied == 0 { + // If we haven't copied anything so far, we can safely just + // fallback to io.Copy. We could always do the fallback but + // it's safer to error out in the case of a partial copy + // followed by an error (which should never happen). + goto fallback + } + return copied, fmt.Errorf("partial sendfile copy: %w", err) + } + } + return copied, nil + } + +fallback: + return io.Copy(dst, src) +} + +// SetLinuxPersonality sets the Linux execution personality. For more information see the personality syscall documentation. +// checkout getLinuxPersonalityFromStr() from libcontainer/specconv/spec_linux.go for type conversion. +func SetLinuxPersonality(personality int) error { + _, _, errno := unix.Syscall(unix.SYS_PERSONALITY, uintptr(personality), 0, 0) + if errno != 0 { + return &os.SyscallError{Syscall: "set_personality", Err: errno} + } + return nil +} + +// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). +func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { + // Make sure O_NOCTTY is always set -- otherwise runc might accidentally + // gain it as a controlling terminal. O_CLOEXEC also needs to be set to + // make sure we don't leak the handle either. + flags |= unix.O_NOCTTY | unix.O_CLOEXEC + + // There is no nice wrapper for this kind of ioctl in unix. + peerFd, _, errno := unix.Syscall( + unix.SYS_IOCTL, + ptyFd, + uintptr(unix.TIOCGPTPEER), + uintptr(flags), + ) + if errno != 0 { + return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) + } + return os.NewFile(peerFd, unsafePeerPath), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go new file mode 100644 index 0000000000..34850dd831 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go @@ -0,0 +1,137 @@ +package system + +import ( + "fmt" + "io" + "os" + "strconv" + "strings" + + "github.com/opencontainers/runc/internal/pathrs" +) + +// State is the status of a process. +type State rune + +const ( // Only values for Linux 3.14 and later are listed here + Dead State = 'X' + DiskSleep State = 'D' + Running State = 'R' + Sleeping State = 'S' + Stopped State = 'T' + TracingStop State = 't' + Zombie State = 'Z' + Parked State = 'P' + Idle State = 'I' +) + +// String forms of the state from proc(5)'s documentation for +// /proc/[pid]/status' "State" field. +func (s State) String() string { + switch s { + case Dead: + return "dead" + case DiskSleep: + return "disk sleep" + case Running: + return "running" + case Sleeping: + return "sleeping" + case Stopped: + return "stopped" + case TracingStop: + return "tracing stop" + case Zombie: + return "zombie" + case Parked: + return "parked" + case Idle: + return "idle" // kernel thread + default: + return fmt.Sprintf("unknown (%c)", s) + } +} + +// Stat_t represents the information from /proc/[pid]/stat, as +// described in proc(5) with names based on the /proc/[pid]/status +// fields. +type Stat_t struct { + // Name is the command run by the process. + Name string + + // State is the state of the process. + State State + + // StartTime is the number of clock ticks after system boot (since + // Linux 2.6). + StartTime uint64 +} + +// Stat returns a Stat_t instance for the specified process. +func Stat(pid int) (Stat_t, error) { + var stat Stat_t + + statFile, err := pathrs.ProcPidOpen(pid, "stat", os.O_RDONLY) + if err != nil { + return stat, err + } + defer statFile.Close() + + bytes, err := io.ReadAll(statFile) + if err != nil { + return stat, err + } + return parseStat(string(bytes)) +} + +func parseStat(data string) (stat Stat_t, err error) { + // Example: + // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + // The fields are space-separated, see full description in proc(5). + // + // We are only interested in: + // * field 2: process name. It is the only field enclosed into + // parenthesis, as it can contain spaces (and parenthesis) inside. + // * field 3: process state, a single character (%c) + // * field 22: process start time, a long unsigned integer (%llu). + + // 1. Look for the first '(' and the last ')' first, what's in between is Name. + // We expect at least 20 fields and a space after the last one. + + const minAfterName = 20*2 + 1 // the min field is '0 '. + + first := strings.IndexByte(data, '(') + if first < 0 || first+minAfterName >= len(data) { + return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data) + } + + last := strings.LastIndexByte(data, ')') + if last <= first || last+minAfterName >= len(data) { + return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data) + } + + stat.Name = data[first+1 : last] + + // 2. Remove fields 1 and 2 and a space after. State is right after. + data = data[last+2:] + stat.State = State(data[0]) + + // 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces. + skipSpaces := 22 - 3 + for first = 0; skipSpaces > 0 && first < len(data); first++ { + if data[first] == ' ' { + skipSpaces-- + } + } + // Now first points to StartTime; look for space right after. + i := strings.IndexByte(data[first:], ' ') + if i < 0 { + return stat, fmt.Errorf("invalid stat data (too short): %q", data) + } + stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64) + if err != nil { + return stat, fmt.Errorf("invalid stat data (bad start time): %w", err) + } + + return stat, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go new file mode 100644 index 0000000000..4595fa82aa --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/rlimit_linux.go @@ -0,0 +1,15 @@ +//go:build go1.23 + +package system + +import ( + "syscall" +) + +// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument +// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076. +func ClearRlimitNofileCache(lim *syscall.Rlimit) { + // Ignore the return values since we only need to clean the cache, + // the limit is going to be set via unix.Prlimit elsewhere. + _ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim) +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go new file mode 100644 index 0000000000..3aca5bdacc --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -0,0 +1,135 @@ +package utils + +/* + * Copyright 2016, 2017 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import ( + "fmt" + "os" + "runtime" + + "golang.org/x/sys/unix" +) + +// MaxNameLen is the maximum length of the name of a file descriptor being sent +// using SendFile. The name of the file handle returned by RecvFile will never be +// larger than this value. +const MaxNameLen = 4096 + +// oobSpace is the size of the oob slice required to store a single FD. Note +// that unix.UnixRights appears to make the assumption that fd is always int32, +// so sizeof(fd) = 4. +var oobSpace = unix.CmsgSpace(4) + +// RecvFile waits for a file descriptor to be sent over the given AF_UNIX +// socket. The file name of the remote file descriptor will be recreated +// locally (it is sent as non-auxiliary data in the same payload). +func RecvFile(socket *os.File) (_ *os.File, Err error) { + name := make([]byte, MaxNameLen) + oob := make([]byte, oobSpace) + + sockfd := socket.Fd() + var ( + n, oobn int + err error + ) + + for { + n, oobn, _, _, err = unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + break + } + } + + if err != nil { + return nil, os.NewSyscallError("recvmsg", err) + } + if n >= MaxNameLen || oobn != oobSpace { + return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) + } + // Truncate. + name = name[:n] + oob = oob[:oobn] + + scms, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return nil, err + } + + // We cannot control how many SCM_RIGHTS we receive, and upon receiving + // them all of the descriptors are installed in our fd table, so we need to + // parse all of the SCM_RIGHTS we received in order to close all of the + // descriptors on error. + var fds []int + defer func() { + for i, fd := range fds { + if i == 0 && Err == nil { + // Only close the first one on error. + continue + } + // Always close extra ones. + _ = unix.Close(fd) + } + }() + var lastErr error + for _, scm := range scms { + if scm.Header.Type == unix.SCM_RIGHTS { + scmFds, err := unix.ParseUnixRights(&scm) + if err != nil { + lastErr = err + } else { + fds = append(fds, scmFds...) + } + } + } + if lastErr != nil { + return nil, lastErr + } + + // We do this after collecting the fds to make sure we close them all when + // returning an error here. + if len(scms) != 1 { + return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) + } + if len(fds) != 1 { + return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) + } + return os.NewFile(uintptr(fds[0]), string(name)), nil +} + +// SendFile sends a file over the given AF_UNIX socket. file.Name() is also +// included so that if the other end uses RecvFile, the file will have the same +// name information. +func SendFile(socket *os.File, file *os.File) error { + name := file.Name() + if len(name) >= MaxNameLen { + return fmt.Errorf("sendfd: filename too long: %s", name) + } + err := SendRawFd(socket, name, file.Fd()) + runtime.KeepAlive(file) + return err +} + +// SendRawFd sends a specific file descriptor over the given AF_UNIX socket. +func SendRawFd(socket *os.File, msg string, fd uintptr) error { + oob := unix.UnixRights(int(fd)) + for { + err := unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) + if err != unix.EINTR { //nolint:errorlint // unix errors are bare + return os.NewSyscallError("sendmsg", err) + } + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go new file mode 100644 index 0000000000..17259de980 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go @@ -0,0 +1,115 @@ +package utils + +import ( + "encoding/json" + "io" + "os" + "path/filepath" + "strings" + + "golang.org/x/sys/unix" +) + +const ( + exitSignalOffset = 128 +) + +// ExitStatus returns the correct exit status for a process based on if it +// was signaled or exited cleanly +func ExitStatus(status unix.WaitStatus) int { + if status.Signaled() { + return exitSignalOffset + int(status.Signal()) + } + return status.ExitStatus() +} + +// WriteJSON writes the provided struct v to w using standard json marshaling +// without a trailing newline. This is used instead of json.Encoder because +// there might be a problem in json decoder in some cases, see: +// https://github.com/docker/docker/issues/14203#issuecomment-174177790 +func WriteJSON(w io.Writer, v interface{}) error { + data, err := json.Marshal(v) + if err != nil { + return err + } + _, err = w.Write(data) + return err +} + +// CleanPath makes a path safe for use with filepath.Join. This is done by not +// only cleaning the path, but also (if the path is relative) adding a leading +// '/' and cleaning it (then removing the leading '/'). This ensures that a +// path resulting from prepending another path will always resolve to lexically +// be a subdirectory of the prefixed path. This is all done lexically, so paths +// that include symlinks won't be safe as a result of using CleanPath. +func CleanPath(path string) string { + // Deal with empty strings nicely. + if path == "" { + return "" + } + + // Ensure that all paths are cleaned (especially problematic ones like + // "/../../../../../" which can cause lots of issues). + + if filepath.IsAbs(path) { + return filepath.Clean(path) + } + + // If the path isn't absolute, we need to do more processing to fix paths + // such as "../../../..//some/path". We also shouldn't convert absolute + // paths to relative ones. + path = filepath.Clean(string(os.PathSeparator) + path) + // This can't fail, as (by definition) all paths are relative to root. + path, _ = filepath.Rel(string(os.PathSeparator), path) + + return path +} + +// StripRoot returns the passed path, stripping the root path if it was +// (lexicially) inside it. Note that both passed paths will always be treated +// as absolute, and the returned path will also always be absolute. In +// addition, the paths are cleaned before stripping the root. +func StripRoot(root, path string) string { + // Make the paths clean and absolute. + root, path = CleanPath("/"+root), CleanPath("/"+path) + switch { + case path == root: + path = "/" + case root == "/": + // do nothing + default: + path = strings.TrimPrefix(path, root+"/") + } + return CleanPath("/" + path) +} + +// SearchLabels searches through a list of key=value pairs for a given key, +// returning its value, and the binary flag telling whether the key exist. +func SearchLabels(labels []string, key string) (string, bool) { + key += "=" + for _, s := range labels { + if val, ok := strings.CutPrefix(s, key); ok { + return val, true + } + } + return "", false +} + +// Annotations returns the bundle path and user defined annotations from the +// libcontainer state. We need to remove the bundle because that is a label +// added by libcontainer. +func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { + userAnnotations = make(map[string]string) + for _, l := range labels { + name, value, ok := strings.Cut(l, "=") + if !ok { + continue + } + if name == "bundle" { + bundle = value + } else { + userAnnotations[name] = value + } + } + return bundle, userAnnotations +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go new file mode 100644 index 0000000000..7dbec54dc9 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -0,0 +1,277 @@ +//go:build !windows + +package utils + +import ( + "fmt" + "math" + "os" + "path/filepath" + "runtime" + "strconv" + "sync" + _ "unsafe" // for go:linkname + + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/internal/pathrs" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +var ( + haveCloseRangeCloexecBool bool + haveCloseRangeCloexecOnce sync.Once +) + +func haveCloseRangeCloexec() bool { + haveCloseRangeCloexecOnce.Do(func() { + // Make sure we're not closing a random file descriptor. + tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0) + if err != nil { + return + } + defer unix.Close(tmpFd) + + err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC) + // Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC). + // -ENOSYS and -EINVAL ultimately mean we don't have support, but any + // other potential error would imply that even the most basic close + // operation wouldn't work. + haveCloseRangeCloexecBool = err == nil + }) + return haveCloseRangeCloexecBool +} + +type fdFunc func(fd int) + +// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in +// the current process. +func fdRangeFrom(minFd int, fn fdFunc) error { + fdDir, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("get handle to /proc/thread-self/fd: %w", err) + } + defer closer() + defer fdDir.Close() + + fdList, err := fdDir.Readdirnames(-1) + if err != nil { + return err + } + for _, fdStr := range fdList { + fd, err := strconv.Atoi(fdStr) + // Ignore non-numeric file names. + if err != nil { + continue + } + // Ignore descriptors lower than our specified minimum. + if fd < minFd { + continue + } + // Ignore the file descriptor we used for readdir, as it will be closed + // when we return. + if uintptr(fd) == fdDir.Fd() { + continue + } + // Run the closure. + fn(fd) + } + return nil +} + +// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or +// equal to minFd in the current process. +func CloseExecFrom(minFd int) error { + // Use close_range(CLOSE_RANGE_CLOEXEC) if possible. + if haveCloseRangeCloexec() { + err := unix.CloseRange(uint(minFd), math.MaxInt32, unix.CLOSE_RANGE_CLOEXEC) + if err == nil { + return nil + } + + logrus.Debugf("close_range failed, closing range one at a time (error: %v)", err) + + // If close_range fails, we fall back to the standard loop. + } + // Otherwise, fall back to the standard loop. + return fdRangeFrom(minFd, unix.CloseOnExec) +} + +//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor + +// In order to make sure we do not close the internal epoll descriptors the Go +// runtime uses, we need to ensure that we skip descriptors that match +// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing, +// unfortunately there's no other way to be sure we're only keeping the file +// descriptors the Go runtime needs. Hopefully nothing blows up doing this... +func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive + +// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the +// current process, except for those critical to Go's runtime (such as the +// netpoll management descriptors). +// +// NOTE: That this function is incredibly dangerous to use in most Go code, as +// closing file descriptors from underneath *os.File handles can lead to very +// bad behaviour (the closed file descriptor can be re-used and then any +// *os.File operations would apply to the wrong file). This function is only +// intended to be called from the last stage of runc init. +func UnsafeCloseFrom(minFd int) error { + // We cannot use close_range(2) even if it is available, because we must + // not close some file descriptors. + return fdRangeFrom(minFd, func(fd int) { + if runtime_IsPollDescriptor(uintptr(fd)) { + // These are the Go runtimes internal netpoll file descriptors. + // These file descriptors are operated on deep in the Go scheduler, + // and closing those files from underneath Go can result in panics. + // There is no issue with keeping them because they are not + // executable and are not useful to an attacker anyway. Also we + // don't have any choice. + return + } + // There's nothing we can do about errors from close(2), and the + // only likely error to be seen is EBADF which indicates the fd was + // already closed (in which case, we got what we wanted). + _ = unix.Close(fd) + }) +} + +// NewSockPair returns a new SOCK_STREAM unix socket pair. +func NewSockPair(name string) (parent, child *os.File, err error) { + fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil +} + +// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) +// corresponding to the unsafePath resolved within the root. Before passing the +// fd, this path is verified to have been inside the root -- so operating on it +// through the passed fdpath should be safe. Do not access this path through +// the original path strings, and do not attempt to use the pathname outside of +// the passed closure (the file handle will be freed once the closure returns). +func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { + // Remove the root then forcefully resolve inside the root. + unsafePath = StripRoot(root, unsafePath) + fullPath, err := securejoin.SecureJoin(root, unsafePath) + if err != nil { + return fmt.Errorf("resolving path inside rootfs failed: %w", err) + } + + procSelfFd, closer := ProcThreadSelf("fd/") + defer closer() + + // Open the target path. + fh, err := os.OpenFile(fullPath, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("open o_path procfd: %w", err) + } + defer fh.Close() + + procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd()))) + // Double-check the path is the one we expected. + if realpath, err := os.Readlink(procfd); err != nil { + return fmt.Errorf("procfd verification failed: %w", err) + } else if realpath != fullPath { + return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) + } + + return fn(procfd) +} + +// WithProcfdFile is a very minimal wrapper around [ProcThreadSelfFd], intended +// to make migrating from [WithProcfd] and [WithProcfdPath] usage easier. The +// caller is responsible for making sure that the provided file handle is +// actually safe to operate on. +func WithProcfdFile(file *os.File, fn func(procfd string) error) error { + fdpath, closer := ProcThreadSelfFd(file.Fd()) + defer closer() + + return fn(fdpath) +} + +type ProcThreadSelfCloser func() + +var ( + haveProcThreadSelf bool + haveProcThreadSelfOnce sync.Once +) + +// ProcThreadSelf returns a string that is equivalent to +// /proc/thread-self/, with a graceful fallback on older kernels where +// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin, +// meaning that the passed string needs to be trusted. The caller _must_ call +// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread) +// *only once* after it has finished using the returned path string. +func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) { + haveProcThreadSelfOnce.Do(func() { + if _, err := os.Stat("/proc/thread-self/"); err == nil { + haveProcThreadSelf = true + } else { + logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/", err) + } + }) + + // We need to lock our thread until the caller is done with the path string + // because any non-atomic operation on the path (such as opening a file, + // then reading it) could be interrupted by the Go runtime where the + // underlying thread is swapped out and the original thread is killed, + // resulting in pull-your-hair-out-hard-to-debug issues in the caller. In + // addition, the pre-3.17 fallback makes everything non-atomic because the + // same thing could happen between unix.Gettid() and the path operations. + // + // In theory, we don't need to lock in the atomic user case when using + // /proc/thread-self/, but it's better to be safe than sorry (and there are + // only one or two truly atomic users of /proc/thread-self/). + runtime.LockOSThread() + + threadSelf := "/proc/thread-self/" + if !haveProcThreadSelf { + // Pre-3.17 kernels did not have /proc/thread-self, so do it manually. + threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/" + if _, err := os.Stat(threadSelf); err != nil { + // Unfortunately, this code is called from rootfs_linux.go where we + // are running inside the pid namespace of the container but /proc + // is the host's procfs. Unfortunately there is no real way to get + // the correct tid to use here (the kernel age means we cannot do + // things like set up a private fsopen("proc") -- even scanning + // NSpid in all of the tasks in /proc/self/task/*/status requires + // Linux 4.1). + // + // So, we just have to assume that /proc/self is acceptable in this + // one specific case. + if os.Getpid() == 1 { + logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err) + } else { + // This should never happen, but the fallback should work in most cases... + logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err) + } + threadSelf = "/proc/self/" + } + } + return threadSelf + subpath, runtime.UnlockOSThread +} + +// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to +// create a /proc/thread-self handle for given file descriptor. +// +// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but +// without using fmt.Sprintf to avoid unneeded overhead. +func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { + return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) +} + +// Openat is a Go-friendly openat(2) wrapper. +func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { + dirFd := unix.AT_FDCWD + if dir != nil { + dirFd = int(dir.Fd()) + } + flags |= unix.O_CLOEXEC + + fd, err := unix.Openat(dirFd, path, flags, mode) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: path, Err: err} + } + return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil +} diff --git a/vendor/github.com/opencontainers/runc/types/events.go b/vendor/github.com/opencontainers/runc/types/events.go new file mode 100644 index 0000000000..ed7d2408fd --- /dev/null +++ b/vendor/github.com/opencontainers/runc/types/events.go @@ -0,0 +1,165 @@ +package types + +import ( + "github.com/opencontainers/cgroups" + "github.com/opencontainers/runc/libcontainer/intelrdt" +) + +// Event struct for encoding the event data to json. +type Event struct { + Type string `json:"type"` + ID string `json:"id"` + Data interface{} `json:"data,omitempty"` +} + +// Stats is the runc specific stats structure for stability when encoding and decoding stats. +type Stats struct { + CPU Cpu `json:"cpu"` + CPUSet CPUSet `json:"cpuset"` + Memory Memory `json:"memory"` + Pids Pids `json:"pids"` + Blkio Blkio `json:"blkio"` + Hugetlb map[string]Hugetlb `json:"hugetlb"` + IntelRdt IntelRdt `json:"intel_rdt"` + NetworkInterfaces []*NetworkInterface `json:"network_interfaces"` +} + +type PSIData = cgroups.PSIData + +type PSIStats = cgroups.PSIStats + +type Hugetlb struct { + Usage uint64 `json:"usage,omitempty"` + Max uint64 `json:"max,omitempty"` + Failcnt uint64 `json:"failcnt"` +} + +type BlkioEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type Blkio struct { + IoServiceBytesRecursive []BlkioEntry `json:"ioServiceBytesRecursive,omitempty"` + IoServicedRecursive []BlkioEntry `json:"ioServicedRecursive,omitempty"` + IoQueuedRecursive []BlkioEntry `json:"ioQueueRecursive,omitempty"` + IoServiceTimeRecursive []BlkioEntry `json:"ioServiceTimeRecursive,omitempty"` + IoWaitTimeRecursive []BlkioEntry `json:"ioWaitTimeRecursive,omitempty"` + IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"` + IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"` + SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` +} + +type Pids struct { + Current uint64 `json:"current,omitempty"` + Limit uint64 `json:"limit,omitempty"` +} + +type Throttling struct { + Periods uint64 `json:"periods,omitempty"` + ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"` + ThrottledTime uint64 `json:"throttledTime,omitempty"` +} + +type CpuUsage struct { + // Units: nanoseconds. + Total uint64 `json:"total,omitempty"` + Percpu []uint64 `json:"percpu,omitempty"` + PercpuKernel []uint64 `json:"percpu_kernel,omitempty"` + PercpuUser []uint64 `json:"percpu_user,omitempty"` + Kernel uint64 `json:"kernel"` + User uint64 `json:"user"` +} + +type Cpu struct { + Usage CpuUsage `json:"usage,omitempty"` + Throttling Throttling `json:"throttling,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` +} + +type CPUSet struct { + CPUs []uint16 `json:"cpus,omitempty"` + CPUExclusive uint64 `json:"cpu_exclusive"` + Mems []uint16 `json:"mems,omitempty"` + MemHardwall uint64 `json:"mem_hardwall"` + MemExclusive uint64 `json:"mem_exclusive"` + MemoryMigrate uint64 `json:"memory_migrate"` + MemorySpreadPage uint64 `json:"memory_spread_page"` + MemorySpreadSlab uint64 `json:"memory_spread_slab"` + MemoryPressure uint64 `json:"memory_pressure"` + SchedLoadBalance uint64 `json:"sched_load_balance"` + SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` +} + +type MemoryEntry struct { + Limit uint64 `json:"limit"` + Usage uint64 `json:"usage,omitempty"` + Max uint64 `json:"max,omitempty"` + Failcnt uint64 `json:"failcnt"` +} + +type Memory struct { + Cache uint64 `json:"cache,omitempty"` + Usage MemoryEntry `json:"usage,omitempty"` + Swap MemoryEntry `json:"swap,omitempty"` + Kernel MemoryEntry `json:"kernel,omitempty"` + KernelTCP MemoryEntry `json:"kernelTCP,omitempty"` + Raw map[string]uint64 `json:"raw,omitempty"` + PSI *PSIStats `json:"psi,omitempty"` +} + +type L3CacheInfo struct { + CbmMask string `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type MemBwInfo struct { + BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` + DelayLinear uint64 `json:"delay_linear,omitempty"` + MinBandwidth uint64 `json:"min_bandwidth,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type IntelRdt struct { + // The read-only L3 cache information + L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` + + // The read-only L3 cache schema in root + L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` + + // The L3 cache schema in 'container_id' group + L3CacheSchema string `json:"l3_cache_schema,omitempty"` + + // The read-only memory bandwidth information + MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` + + // The read-only memory bandwidth schema in root + MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` + + // The memory bandwidth schema in 'container_id' group + MemBwSchema string `json:"mem_bw_schema,omitempty"` + + // The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group + MBMStats *[]intelrdt.MBMNumaNodeStats `json:"mbm_stats,omitempty"` + + // The cache monitoring technology statistics from NUMA nodes in 'container_id' group + CMTStats *[]intelrdt.CMTNumaNodeStats `json:"cmt_stats,omitempty"` +} + +type NetworkInterface struct { + // Name is the name of the network interface. + Name string + + RxBytes uint64 + RxPackets uint64 + RxErrors uint64 + RxDropped uint64 + TxBytes uint64 + TxPackets uint64 + TxErrors uint64 + TxDropped uint64 +} diff --git a/vendor/github.com/opencontainers/selinux/LICENSE b/vendor/github.com/opencontainers/selinux/LICENSE new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/doc.go b/vendor/github.com/opencontainers/selinux/go-selinux/doc.go new file mode 100644 index 0000000000..57a15c9a11 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/doc.go @@ -0,0 +1,13 @@ +/* +Package selinux provides a high-level interface for interacting with selinux. + +Usage: + + import "github.com/opencontainers/selinux/go-selinux" + + // Ensure that selinux is enforcing mode. + if selinux.EnforceMode() != selinux.Enforcing { + selinux.SetEnforceMode(selinux.Enforcing) + } +*/ +package selinux diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go new file mode 100644 index 0000000000..884a8b8059 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go @@ -0,0 +1,48 @@ +package label + +import ( + "fmt" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Init initialises the labeling system +func Init() { + _ = selinux.GetEnabled() +} + +// FormatMountLabel returns a string to be used by the mount command. Using +// the SELinux `context` mount option. Changing labels of files on mount +// points with this option can never be changed. +// FormatMountLabel returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabel(src, mountLabel string) string { + return FormatMountLabelByType(src, mountLabel, "context") +} + +// FormatMountLabelByType returns a string to be used by the mount command. +// Allow caller to specify the mount options. For example using the SELinux +// `fscontext` mount option would allow certain container processes to change +// labels of files created on the mount points, where as `context` option does +// not. +// FormatMountLabelByType returns a string to be used by the mount command. +// The format of this string will be used to alter the labeling of the mountpoint. +// The string returned is suitable to be used as the options field of the mount command. +// If you need to have additional mount point options, you can pass them in as +// the first parameter. Second parameter is the label that you wish to apply +// to all content in the mount point. +func FormatMountLabelByType(src, mountLabel, contextType string) string { + if mountLabel != "" { + switch src { + case "": + src = fmt.Sprintf("%s=%q", contextType, mountLabel) + default: + src = fmt.Sprintf("%s,%s=%q", src, contextType, mountLabel) + } + } + return src +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go new file mode 100644 index 0000000000..95f29e21f4 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go @@ -0,0 +1,136 @@ +package label + +import ( + "errors" + "fmt" + "strings" + + "github.com/opencontainers/selinux/go-selinux" +) + +// Valid Label Options +var validOptions = map[string]bool{ + "disable": true, + "type": true, + "filetype": true, + "user": true, + "role": true, + "level": true, +} + +var ErrIncompatibleLabel = errors.New("bad SELinux option: z and Z can not be used together") + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. The labels returned will include a random MCS String, that is +// guaranteed to be unique. +// If the disabled flag is passed in, the process label will not be set, but the mount label will be set +// to the container_file label with the maximum category. This label is not usable by any confined label. +func InitLabels(options []string) (plabel string, mlabel string, retErr error) { + if !selinux.GetEnabled() { + return "", "", nil + } + processLabel, mountLabel := selinux.ContainerLabels() + if processLabel != "" { + defer func() { + if retErr != nil { + selinux.ReleaseLabel(mountLabel) + } + }() + pcon, err := selinux.NewContext(processLabel) + if err != nil { + return "", "", err + } + mcsLevel := pcon["level"] + mcon, err := selinux.NewContext(mountLabel) + if err != nil { + return "", "", err + } + for _, opt := range options { + if opt == "disable" { + selinux.ReleaseLabel(mountLabel) + return "", selinux.PrivContainerMountLabel(), nil + } + if i := strings.Index(opt, ":"); i == -1 { + return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) + } + con := strings.SplitN(opt, ":", 2) + if !validOptions[con[0]] { + return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) + } + if con[0] == "filetype" { + mcon["type"] = con[1] + continue + } + pcon[con[0]] = con[1] + if con[0] == "level" || con[0] == "user" { + mcon[con[0]] = con[1] + } + } + if pcon.Get() != processLabel { + if pcon["level"] != mcsLevel { + selinux.ReleaseLabel(processLabel) + } + processLabel = pcon.Get() + selinux.ReserveLabel(processLabel) + } + mountLabel = mcon.Get() + } + return processLabel, mountLabel, nil +} + +// SetFileLabel modifies the "path" label to the specified file label +func SetFileLabel(path string, fileLabel string) error { + if !selinux.GetEnabled() || fileLabel == "" { + return nil + } + return selinux.SetFileLabel(path, fileLabel) +} + +// SetFileCreateLabel tells the kernel the label for all files to be created +func SetFileCreateLabel(fileLabel string) error { + if !selinux.GetEnabled() { + return nil + } + return selinux.SetFSCreateLabel(fileLabel) +} + +// Relabel changes the label of path and all the entries beneath the path. +// It changes the MCS label to s0 if shared is true. +// This will allow all containers to share the content. +// +// The path itself is guaranteed to be relabeled last. +func Relabel(path string, fileLabel string, shared bool) error { + if !selinux.GetEnabled() || fileLabel == "" { + return nil + } + + if shared { + c, err := selinux.NewContext(fileLabel) + if err != nil { + return err + } + + c["level"] = "s0" + fileLabel = c.Get() + } + return selinux.Chcon(path, fileLabel, true) +} + +// Validate checks that the label does not include unexpected options +func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { + return ErrIncompatibleLabel + } + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(label string) bool { + return strings.Contains(label, "z") || strings.Contains(label, "Z") +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(label string) bool { + return strings.Contains(label, "z") +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go new file mode 100644 index 0000000000..7a54afc5e6 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go @@ -0,0 +1,44 @@ +//go:build !linux +// +build !linux + +package label + +// InitLabels returns the process label and file labels to be used within +// the container. A list of options can be passed into this function to alter +// the labels. +func InitLabels([]string) (string, string, error) { + return "", "", nil +} + +func SetFileLabel(string, string) error { + return nil +} + +func SetFileCreateLabel(string) error { + return nil +} + +func Relabel(string, string, bool) error { + return nil +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return nil +} + +// Validate checks that the label does not include unexpected options +func Validate(string) error { + return nil +} + +// RelabelNeeded checks whether the user requested a relabel +func RelabelNeeded(string) bool { + return false +} + +// IsShared checks that the label includes a "shared" mark +func IsShared(string) bool { + return false +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go new file mode 100644 index 0000000000..15150d4752 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go @@ -0,0 +1,322 @@ +package selinux + +import ( + "errors" +) + +const ( + // Enforcing constant indicate SELinux is in enforcing mode + Enforcing = 1 + // Permissive constant to indicate SELinux is in permissive mode + Permissive = 0 + // Disabled constant to indicate SELinux is disabled + Disabled = -1 + // maxCategory is the maximum number of categories used within containers + maxCategory = 1024 + // DefaultCategoryRange is the upper bound on the category range + DefaultCategoryRange = uint32(maxCategory) +) + +var ( + // ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS. + ErrMCSAlreadyExists = errors.New("MCS label already exists") + // ErrEmptyPath is returned when an empty path has been specified. + ErrEmptyPath = errors.New("empty path") + + // ErrInvalidLabel is returned when an invalid label is specified. + ErrInvalidLabel = errors.New("invalid Label") + + // InvalidLabel is returned when an invalid label is specified. + // + // Deprecated: use [ErrInvalidLabel]. + InvalidLabel = ErrInvalidLabel + + // ErrIncomparable is returned two levels are not comparable + ErrIncomparable = errors.New("incomparable levels") + // ErrLevelSyntax is returned when a sensitivity or category do not have correct syntax in a level + ErrLevelSyntax = errors.New("invalid level syntax") + + // ErrContextMissing is returned if a requested context is not found in a file. + ErrContextMissing = errors.New("context does not have a match") + // ErrVerifierNil is returned when a context verifier function is nil. + ErrVerifierNil = errors.New("verifier function is nil") + + // ErrNotTGLeader is returned by [SetKeyLabel] if the calling thread + // is not the thread group leader. + ErrNotTGLeader = errors.New("calling thread is not the thread group leader") + + // CategoryRange allows the upper bound on the category range to be adjusted + CategoryRange = DefaultCategoryRange + + privContainerMountLabel string +) + +// Context is a representation of the SELinux label broken into 4 parts +type Context map[string]string + +// SetDisabled disables SELinux support for the package +func SetDisabled() { + setDisabled() +} + +// GetEnabled returns whether SELinux is currently enabled. +func GetEnabled() bool { + return getEnabled() +} + +// ClassIndex returns the int index for an object class in the loaded policy, +// or -1 and an error +func ClassIndex(class string) (int, error) { + return classIndex(class) +} + +// SetFileLabel sets the SELinux label for this path, following symlinks, +// or returns an error. +func SetFileLabel(fpath string, label string) error { + return setFileLabel(fpath, label) +} + +// LsetFileLabel sets the SELinux label for this path, not following symlinks, +// or returns an error. +func LsetFileLabel(fpath string, label string) error { + return lSetFileLabel(fpath, label) +} + +// FileLabel returns the SELinux label for this path, following symlinks, +// or returns an error. +func FileLabel(fpath string) (string, error) { + return fileLabel(fpath) +} + +// LfileLabel returns the SELinux label for this path, not following symlinks, +// or returns an error. +func LfileLabel(fpath string) (string, error) { + return lFileLabel(fpath) +} + +// SetFSCreateLabel tells the kernel what label to use for all file system objects +// created by this task. +// Set the label to an empty string to return to the default label. Calls to SetFSCreateLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until file system +// objects created by this task are finished to guarantee another goroutine does not migrate +// to the current thread before execution is complete. +func SetFSCreateLabel(label string) error { + return setFSCreateLabel(label) +} + +// FSCreateLabel returns the default label the kernel which the kernel is using +// for file system objects created by this task. "" indicates default. +func FSCreateLabel() (string, error) { + return fsCreateLabel() +} + +// CurrentLabel returns the SELinux label of the current process thread, or an error. +func CurrentLabel() (string, error) { + return currentLabel() +} + +// PidLabel returns the SELinux label of the given pid, or an error. +func PidLabel(pid int) (string, error) { + return pidLabel(pid) +} + +// ExecLabel returns the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. +func ExecLabel() (string, error) { + return execLabel() +} + +// CanonicalizeContext takes a context string and writes it to the kernel +// the function then returns the context that the kernel will use. Use this +// function to check if two contexts are equivalent +func CanonicalizeContext(val string) (string, error) { + return canonicalizeContext(val) +} + +// ComputeCreateContext requests the type transition from source to target for +// class from the kernel. +func ComputeCreateContext(source string, target string, class string) (string, error) { + return computeCreateContext(source, target, class) +} + +// CalculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) +// of a source and target range. +// The glblub is calculated as the greater of the low sensitivities and +// the lower of the high sensitivities and the and of each category bitset. +func CalculateGlbLub(sourceRange, targetRange string) (string, error) { + return calculateGlbLub(sourceRange, targetRange) +} + +// SetExecLabel sets the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. Calls to SetExecLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until execution +// of the program is finished to guarantee another goroutine does not migrate to the current +// thread before execution is complete. +func SetExecLabel(label string) error { + return writeConThreadSelf("attr/exec", label) +} + +// SetTaskLabel sets the SELinux label for the current thread, or an error. +// This requires the dyntransition permission. Calls to SetTaskLabel should +// be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee +// the current thread does not run in a new mislabeled thread. +func SetTaskLabel(label string) error { + return writeConThreadSelf("attr/current", label) +} + +// SetSocketLabel takes a process label and tells the kernel to assign the +// label to the next socket that gets created. Calls to SetSocketLabel +// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until +// the socket is created to guarantee another goroutine does not migrate +// to the current thread before execution is complete. +func SetSocketLabel(label string) error { + return writeConThreadSelf("attr/sockcreate", label) +} + +// SocketLabel retrieves the current socket label setting +func SocketLabel() (string, error) { + return readConThreadSelf("attr/sockcreate") +} + +// PeerLabel retrieves the label of the client on the other side of a socket +func PeerLabel(fd uintptr) (string, error) { + return peerLabel(fd) +} + +// SetKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created. +// +// Calls to SetKeyLabel should be wrapped in +// runtime.LockOSThread()/runtime.UnlockOSThread() until the kernel keyring is +// created to guarantee another goroutine does not migrate to the current +// thread before execution is complete. +// +// Only the thread group leader can set key label. +func SetKeyLabel(label string) error { + return setKeyLabel(label) +} + +// KeyLabel retrieves the current kernel keyring label setting +func KeyLabel() (string, error) { + return keyLabel() +} + +// Get returns the Context as a string +func (c Context) Get() string { + return c.get() +} + +// NewContext creates a new Context struct from the specified label +func NewContext(label string) (Context, error) { + return newContext(label) +} + +// ClearLabels clears all reserved labels +func ClearLabels() { + clearLabels() +} + +// ReserveLabel reserves the MLS/MCS level component of the specified label +func ReserveLabel(label string) { + reserveLabel(label) +} + +// MLSEnabled checks if MLS is enabled. +func MLSEnabled() bool { + return isMLSEnabled() +} + +// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func EnforceMode() int { + return enforceMode() +} + +// SetEnforceMode sets the current SELinux mode Enforcing, Permissive. +// Disabled is not valid, since this needs to be set at boot time. +func SetEnforceMode(mode int) error { + return setEnforceMode(mode) +} + +// DefaultEnforceMode returns the systems default SELinux mode Enforcing, +// Permissive or Disabled. Note this is just the default at boot time. +// EnforceMode tells you the systems current mode. +func DefaultEnforceMode() int { + return defaultEnforceMode() +} + +// ReleaseLabel un-reserves the MLS/MCS Level field of the specified label, +// allowing it to be used by another process. +func ReleaseLabel(label string) { + releaseLabel(label) +} + +// ROFileLabel returns the specified SELinux readonly file label +func ROFileLabel() string { + return roFileLabel() +} + +// KVMContainerLabels returns the default processLabel and mountLabel to be used +// for kvm containers by the calling process. +func KVMContainerLabels() (string, string) { + return kvmContainerLabels() +} + +// InitContainerLabels returns the default processLabel and file labels to be +// used for containers running an init system like systemd by the calling process. +func InitContainerLabels() (string, string) { + return initContainerLabels() +} + +// ContainerLabels returns an allocated processLabel and fileLabel to be used for +// container labeling by the calling process. +func ContainerLabels() (processLabel string, fileLabel string) { + return containerLabels() +} + +// SecurityCheckContext validates that the SELinux label is understood by the kernel +func SecurityCheckContext(val string) error { + return securityCheckContext(val) +} + +// CopyLevel returns a label with the MLS/MCS level from src label replaced on +// the dest label. +func CopyLevel(src, dest string) (string, error) { + return copyLevel(src, dest) +} + +// Chcon changes the fpath file object to the SELinux label. +// If fpath is a directory and recurse is true, then Chcon walks the +// directory tree setting the label. +// +// The fpath itself is guaranteed to be relabeled last. +func Chcon(fpath string, label string, recurse bool) error { + return chcon(fpath, label, recurse) +} + +// DupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func DupSecOpt(src string) ([]string, error) { + return dupSecOpt(src) +} + +// DisableSecOpt returns a security opt that can be used to disable SELinux +// labeling support for future container processes. +func DisableSecOpt() []string { + return []string{"disable"} +} + +// GetDefaultContextWithLevel gets a single context for the specified SELinux user +// identity that is reachable from the specified scon context. The context is based +// on the per-user /etc/selinux/{SELINUXTYPE}/contexts/users/ if it exists, +// and falls back to the global /etc/selinux/{SELINUXTYPE}/contexts/default_contexts +// file. +func GetDefaultContextWithLevel(user, level, scon string) (string, error) { + return getDefaultContextWithLevel(user, level, scon) +} + +// PrivContainerMountLabel returns mount label for privileged containers +func PrivContainerMountLabel() string { + // Make sure label is initialized. + _ = label("") + return privContainerMountLabel +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go new file mode 100644 index 0000000000..6d7f8e270b --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go @@ -0,0 +1,1401 @@ +package selinux + +import ( + "bufio" + "bytes" + "crypto/rand" + "encoding/binary" + "errors" + "fmt" + "io" + "io/fs" + "math/big" + "os" + "os/user" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/cyphar/filepath-securejoin/pathrs-lite" + "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" + "golang.org/x/sys/unix" + + "github.com/opencontainers/selinux/pkg/pwalkdir" +) + +const ( + minSensLen = 2 + contextFile = "/usr/share/containers/selinux/contexts" + selinuxDir = "/etc/selinux/" + selinuxUsersDir = "contexts/users" + defaultContexts = "contexts/default_contexts" + selinuxConfig = selinuxDir + "config" + selinuxfsMount = "/sys/fs/selinux" + selinuxTypeTag = "SELINUXTYPE" + selinuxTag = "SELINUX" + xattrNameSelinux = "security.selinux" +) + +type selinuxState struct { + mcsList map[string]bool + selinuxfs string + selinuxfsOnce sync.Once + enabledSet bool + enabled bool + sync.Mutex +} + +type level struct { + cats *big.Int + sens int +} + +type mlsRange struct { + low *level + high *level +} + +type defaultSECtx struct { + userRdr io.Reader + verifier func(string) error + defaultRdr io.Reader + user, level, scon string +} + +type levelItem byte + +const ( + sensitivity levelItem = 's' + category levelItem = 'c' +) + +var ( + readOnlyFileLabel string + state = selinuxState{ + mcsList: make(map[string]bool), + } + + // for policyRoot() + policyRootOnce sync.Once + policyRootVal string + + // for label() + loadLabelsOnce sync.Once + labels map[string]string +) + +func policyRoot() string { + policyRootOnce.Do(func() { + policyRootVal = filepath.Join(selinuxDir, readConfig(selinuxTypeTag)) + }) + + return policyRootVal +} + +func (s *selinuxState) setEnable(enabled bool) bool { + s.Lock() + defer s.Unlock() + s.enabledSet = true + s.enabled = enabled + return s.enabled +} + +func (s *selinuxState) getEnabled() bool { + s.Lock() + enabled := s.enabled + enabledSet := s.enabledSet + s.Unlock() + if enabledSet { + return enabled + } + + enabled = false + if fs := getSelinuxMountPoint(); fs != "" { + if con, _ := CurrentLabel(); con != "kernel" { + enabled = true + } + } + return s.setEnable(enabled) +} + +// setDisabled disables SELinux support for the package +func setDisabled() { + state.setEnable(false) +} + +func verifySELinuxfsMount(mnt string) bool { + var buf unix.Statfs_t + for { + err := unix.Statfs(mnt, &buf) + if err == nil { + break + } + if err == unix.EAGAIN || err == unix.EINTR { + continue + } + return false + } + + //#nosec G115 -- there is no overflow here. + if uint32(buf.Type) != uint32(unix.SELINUX_MAGIC) { + return false + } + if (buf.Flags & unix.ST_RDONLY) != 0 { + return false + } + + return true +} + +func findSELinuxfs() string { + // fast path: check the default mount first + if verifySELinuxfsMount(selinuxfsMount) { + return selinuxfsMount + } + + // check if selinuxfs is available before going the slow path + fs, err := os.ReadFile("/proc/filesystems") + if err != nil { + return "" + } + if !bytes.Contains(fs, []byte("\tselinuxfs\n")) { + return "" + } + + // slow path: try to find among the mounts + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "" + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for { + mnt := findSELinuxfsMount(scanner) + if mnt == "" { // error or not found + return "" + } + if verifySELinuxfsMount(mnt) { + return mnt + } + } +} + +// findSELinuxfsMount returns a next selinuxfs mount point found, +// if there is one, or an empty string in case of EOF or error. +func findSELinuxfsMount(s *bufio.Scanner) string { + for s.Scan() { + txt := s.Bytes() + // The first field after - is fs type. + // Safe as spaces in mountpoints are encoded as \040 + if !bytes.Contains(txt, []byte(" - selinuxfs ")) { + continue + } + const mPos = 5 // mount point is 5th field + fields := bytes.SplitN(txt, []byte(" "), mPos+1) + if len(fields) < mPos+1 { + continue + } + return string(fields[mPos-1]) + } + + return "" +} + +func (s *selinuxState) getSELinuxfs() string { + s.selinuxfsOnce.Do(func() { + s.selinuxfs = findSELinuxfs() + }) + + return s.selinuxfs +} + +// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs +// filesystem or an empty string if no mountpoint is found. Selinuxfs is +// a proc-like pseudo-filesystem that exposes the SELinux policy API to +// processes. The existence of an selinuxfs mount is used to determine +// whether SELinux is currently enabled or not. +func getSelinuxMountPoint() string { + return state.getSELinuxfs() +} + +// getEnabled returns whether SELinux is currently enabled. +func getEnabled() bool { + return state.getEnabled() +} + +func readConfig(target string) string { + in, err := os.Open(selinuxConfig) + if err != nil { + return "" + } + defer in.Close() + + scanner := bufio.NewScanner(in) + + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + fields := bytes.SplitN(line, []byte{'='}, 2) + if len(fields) != 2 { + continue + } + if bytes.Equal(fields[0], []byte(target)) { + return string(bytes.Trim(fields[1], `"`)) + } + } + return "" +} + +func readConFd(in *os.File) (string, error) { + data, err := io.ReadAll(in) + if err != nil { + return "", err + } + return string(bytes.TrimSuffix(data, []byte{0})), nil +} + +func writeConFd(out *os.File, val string) error { + var err error + if val != "" { + _, err = out.Write([]byte(val)) + } else { + _, err = out.Write(nil) + } + return err +} + +// openProcThreadSelf is a small wrapper around [procfs.Handle.OpenThreadSelf] +// and [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The +// provided mode must be os.O_* flags to indicate what mode the returned file +// should be opened with (flags like os.O_CREAT and os.O_EXCL are not +// supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/thread-self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +func openProcThreadSelf(subpath string, mode int) (*os.File, procfs.ProcThreadSelfCloser, error) { + if subpath == "" { + return nil, nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, nil, err + } + defer proc.Close() + + handle, closer, err := proc.OpenThreadSelf(subpath) + if err != nil { + return nil, nil, fmt.Errorf("open /proc/thread-self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + closer() + return nil, nil, fmt.Errorf("reopen /proc/thread-self/%s handle (%#x): %w", subpath, mode, err) + } + return file, closer, nil +} + +// Read the contents of /proc/thread-self/. +func readConThreadSelf(fpath string) (string, error) { + in, closer, err := openProcThreadSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", err + } + defer closer() + defer in.Close() + + return readConFd(in) +} + +// Write to /proc/thread-self/. +func writeConThreadSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, closer, err := openProcThreadSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer closer() + defer out.Close() + + return writeConFd(out, val) +} + +// openProcSelf is a small wrapper around [procfs.Handle.OpenSelf] and +// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The +// provided mode must be os.O_* flags to indicate what mode the returned file +// should be opened with (flags like os.O_CREAT and os.O_EXCL are not +// supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +func openProcSelf(subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenSelf(subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/self/%s handle: %w", subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/self/%s handle (%#x): %w", subpath, mode, err) + } + return file, nil +} + +// Read the contents of /proc/self/. +func readConSelf(fpath string) (string, error) { + in, err := openProcSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", err + } + defer in.Close() + + return readConFd(in) +} + +// Write to /proc/self/. +func writeConSelf(fpath, val string) error { + if val == "" { + if !getEnabled() { + return nil + } + } + + out, err := openProcSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } + defer out.Close() + + return writeConFd(out, val) +} + +// openProcPid is a small wrapper around [procfs.Handle.OpenPid] and +// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The +// provided mode must be os.O_* flags to indicate what mode the returned file +// should be opened with (flags like os.O_CREAT and os.O_EXCL are not +// supported). +// +// If no error occurred, the returned handle is guaranteed to be exactly +// /proc/self/ with no tricky mounts or symlinks causing you to +// operate on an unexpected path (with some caveats on pre-openat2 or +// pre-fsopen kernels). +func openProcPid(pid int, subpath string, mode int) (*os.File, error) { + if subpath == "" { + return nil, ErrEmptyPath + } + + proc, err := procfs.OpenProcRoot() + if err != nil { + return nil, err + } + defer proc.Close() + + handle, err := proc.OpenPid(pid, subpath) + if err != nil { + return nil, fmt.Errorf("open /proc/%d/%s handle: %w", pid, subpath, err) + } + defer handle.Close() // we will return a re-opened handle + + file, err := pathrs.Reopen(handle, mode) + if err != nil { + return nil, fmt.Errorf("reopen /proc/%d/%s handle (%#x): %w", pid, subpath, mode, err) + } + return file, nil +} + +// classIndex returns the int index for an object class in the loaded policy, +// or -1 and an error +func classIndex(class string) (int, error) { + permpath := fmt.Sprintf("class/%s/index", class) + indexpath := filepath.Join(getSelinuxMountPoint(), permpath) + + indexB, err := os.ReadFile(indexpath) + if err != nil { + return -1, err + } + index, err := strconv.Atoi(string(indexB)) + if err != nil { + return -1, err + } + + return index, nil +} + +// lSetFileLabel sets the SELinux label for this path, not following symlinks, +// or returns an error. +func lSetFileLabel(fpath string, label string) error { + if fpath == "" { + return ErrEmptyPath + } + for { + err := unix.Lsetxattr(fpath, xattrNameSelinux, []byte(label), 0) + if err == nil { + break + } + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("lsetxattr(label=%s)", label), Path: fpath, Err: err} + } + } + + return nil +} + +// setFileLabel sets the SELinux label for this path, following symlinks, +// or returns an error. +func setFileLabel(fpath string, label string) error { + if fpath == "" { + return ErrEmptyPath + } + for { + err := unix.Setxattr(fpath, xattrNameSelinux, []byte(label), 0) + if err == nil { + break + } + if err != unix.EINTR { + return &os.PathError{Op: fmt.Sprintf("setxattr(label=%s)", label), Path: fpath, Err: err} + } + } + + return nil +} + +// fileLabel returns the SELinux label for this path, following symlinks, +// or returns an error. +func fileLabel(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + label, err := getxattr(fpath, xattrNameSelinux) + if err != nil { + return "", &os.PathError{Op: "getxattr", Path: fpath, Err: err} + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +// lFileLabel returns the SELinux label for this path, not following symlinks, +// or returns an error. +func lFileLabel(fpath string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + + label, err := lgetxattr(fpath, xattrNameSelinux) + if err != nil { + return "", &os.PathError{Op: "lgetxattr", Path: fpath, Err: err} + } + // Trim the NUL byte at the end of the byte buffer, if present. + if len(label) > 0 && label[len(label)-1] == '\x00' { + label = label[:len(label)-1] + } + return string(label), nil +} + +func setFSCreateLabel(label string) error { + return writeConThreadSelf("attr/fscreate", label) +} + +// fsCreateLabel returns the default label the kernel which the kernel is using +// for file system objects created by this task. "" indicates default. +func fsCreateLabel() (string, error) { + return readConThreadSelf("attr/fscreate") +} + +// currentLabel returns the SELinux label of the current process thread, or an error. +func currentLabel() (string, error) { + return readConThreadSelf("attr/current") +} + +// pidLabel returns the SELinux label of the given pid, or an error. +func pidLabel(pid int) (string, error) { + it, err := openProcPid(pid, "attr/current", os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", nil + } + defer it.Close() + return readConFd(it) +} + +// ExecLabel returns the SELinux label that the kernel will use for any programs +// that are executed by the current process thread, or an error. +func execLabel() (string, error) { + return readConThreadSelf("exec") +} + +// canonicalizeContext takes a context string and writes it to the kernel +// the function then returns the context that the kernel will use. Use this +// function to check if two contexts are equivalent +func canonicalizeContext(val string) (string, error) { + return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val) +} + +// computeCreateContext requests the type transition from source to target for +// class from the kernel. +func computeCreateContext(source string, target string, class string) (string, error) { + classidx, err := classIndex(class) + if err != nil { + return "", err + } + + return readWriteCon(filepath.Join(getSelinuxMountPoint(), "create"), fmt.Sprintf("%s %s %d", source, target, classidx)) +} + +// catsToBitset stores categories in a bitset. +func catsToBitset(cats string) (*big.Int, error) { + bitset := new(big.Int) + + catlist := strings.Split(cats, ",") + for _, r := range catlist { + ranges := strings.SplitN(r, ".", 2) + if len(ranges) > 1 { + catstart, err := parseLevelItem(ranges[0], category) + if err != nil { + return nil, err + } + catend, err := parseLevelItem(ranges[1], category) + if err != nil { + return nil, err + } + for i := catstart; i <= catend; i++ { + bitset.SetBit(bitset, i, 1) + } + } else { + cat, err := parseLevelItem(ranges[0], category) + if err != nil { + return nil, err + } + bitset.SetBit(bitset, cat, 1) + } + } + + return bitset, nil +} + +// parseLevelItem parses and verifies that a sensitivity or category are valid +func parseLevelItem(s string, sep levelItem) (int, error) { + if len(s) < minSensLen || levelItem(s[0]) != sep { + return 0, ErrLevelSyntax + } + const bitSize = 31 // Make sure the result fits into signed int32. + val, err := strconv.ParseUint(s[1:], 10, bitSize) + if err != nil { + return 0, err + } + + return int(val), nil +} + +// parseLevel fills a level from a string that contains +// a sensitivity and categories +func (l *level) parseLevel(levelStr string) error { + lvl := strings.SplitN(levelStr, ":", 2) + sens, err := parseLevelItem(lvl[0], sensitivity) + if err != nil { + return fmt.Errorf("failed to parse sensitivity: %w", err) + } + l.sens = sens + if len(lvl) > 1 { + cats, err := catsToBitset(lvl[1]) + if err != nil { + return fmt.Errorf("failed to parse categories: %w", err) + } + l.cats = cats + } + + return nil +} + +// rangeStrToMLSRange marshals a string representation of a range. +func rangeStrToMLSRange(rangeStr string) (*mlsRange, error) { + r := &mlsRange{} + l := strings.SplitN(rangeStr, "-", 2) + + switch len(l) { + // rangeStr that has a low and a high level, e.g. s4:c0.c1023-s6:c0.c1023 + case 2: + r.high = &level{} + if err := r.high.parseLevel(l[1]); err != nil { + return nil, fmt.Errorf("failed to parse high level %q: %w", l[1], err) + } + fallthrough + // rangeStr that is single level, e.g. s6:c0,c3,c5,c30.c1023 + case 1: + r.low = &level{} + if err := r.low.parseLevel(l[0]); err != nil { + return nil, fmt.Errorf("failed to parse low level %q: %w", l[0], err) + } + } + + if r.high == nil { + r.high = r.low + } + + return r, nil +} + +// bitsetToStr takes a category bitset and returns it in the +// canonical selinux syntax +func bitsetToStr(c *big.Int) string { + var str string + + length := 0 + i0 := int(c.TrailingZeroBits()) //#nosec G115 -- don't expect TralingZeroBits to return values with highest bit set. + for i := i0; i < c.BitLen(); i++ { + if c.Bit(i) == 0 { + continue + } + if length == 0 { + if str != "" { + str += "," + } + str += "c" + strconv.Itoa(i) + } + if c.Bit(i+1) == 1 { + length++ + continue + } + if length == 1 { + str += ",c" + strconv.Itoa(i) + } else if length > 1 { + str += ".c" + strconv.Itoa(i) + } + length = 0 + } + + return str +} + +func (l *level) equal(l2 *level) bool { + if l2 == nil || l == nil { + return l == l2 + } + if l2.sens != l.sens { + return false + } + if l2.cats == nil || l.cats == nil { + return l2.cats == l.cats + } + return l.cats.Cmp(l2.cats) == 0 +} + +// String returns an mlsRange as a string. +func (m mlsRange) String() string { + low := "s" + strconv.Itoa(m.low.sens) + if m.low.cats != nil && m.low.cats.BitLen() > 0 { + low += ":" + bitsetToStr(m.low.cats) + } + + if m.low.equal(m.high) { + return low + } + + high := "s" + strconv.Itoa(m.high.sens) + if m.high.cats != nil && m.high.cats.BitLen() > 0 { + high += ":" + bitsetToStr(m.high.cats) + } + + return low + "-" + high +} + +// TODO: remove these in favor of built-in min/max +// once we stop supporting Go < 1.21. +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} + +// calculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) +// of a source and target range. +// The glblub is calculated as the greater of the low sensitivities and +// the lower of the high sensitivities and the and of each category bitset. +func calculateGlbLub(sourceRange, targetRange string) (string, error) { + s, err := rangeStrToMLSRange(sourceRange) + if err != nil { + return "", err + } + t, err := rangeStrToMLSRange(targetRange) + if err != nil { + return "", err + } + + if s.high.sens < t.low.sens || t.high.sens < s.low.sens { + /* these ranges have no common sensitivities */ + return "", ErrIncomparable + } + + outrange := &mlsRange{low: &level{}, high: &level{}} + + /* take the greatest of the low */ + outrange.low.sens = maxInt(s.low.sens, t.low.sens) + + /* take the least of the high */ + outrange.high.sens = minInt(s.high.sens, t.high.sens) + + /* find the intersecting categories */ + if s.low.cats != nil && t.low.cats != nil { + outrange.low.cats = new(big.Int) + outrange.low.cats.And(s.low.cats, t.low.cats) + } + if s.high.cats != nil && t.high.cats != nil { + outrange.high.cats = new(big.Int) + outrange.high.cats.And(s.high.cats, t.high.cats) + } + + return outrange.String(), nil +} + +func readWriteCon(fpath string, val string) (string, error) { + if fpath == "" { + return "", ErrEmptyPath + } + f, err := os.OpenFile(fpath, os.O_RDWR, 0) + if err != nil { + return "", err + } + defer f.Close() + + _, err = f.Write([]byte(val)) + if err != nil { + return "", err + } + + return readConFd(f) +} + +// peerLabel retrieves the label of the client on the other side of a socket +func peerLabel(fd uintptr) (string, error) { + l, err := unix.GetsockoptString(int(fd), unix.SOL_SOCKET, unix.SO_PEERSEC) + if err != nil { + return "", &os.PathError{Op: "getsockopt", Path: "fd " + strconv.Itoa(int(fd)), Err: err} + } + return l, nil +} + +// setKeyLabel takes a process label and tells the kernel to assign the +// label to the next kernel keyring that gets created +func setKeyLabel(label string) error { + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + err := writeConSelf("attr/keycreate", label) + if errors.Is(err, os.ErrNotExist) { + return nil + } + if label == "" && errors.Is(err, os.ErrPermission) { + return nil + } + if errors.Is(err, unix.EACCES) && unix.Getpid() != unix.Gettid() { + return ErrNotTGLeader + } + return err +} + +// KeyLabel retrieves the current kernel keyring label setting for this +// thread-group. +func keyLabel() (string, error) { + // Rather than using /proc/thread-self, we want to use /proc/self to + // operate on the thread-group leader. + return readConSelf("attr/keycreate") +} + +// get returns the Context as a string +func (c Context) get() string { + if l := c["level"]; l != "" { + return c["user"] + ":" + c["role"] + ":" + c["type"] + ":" + l + } + return c["user"] + ":" + c["role"] + ":" + c["type"] +} + +// newContext creates a new Context struct from the specified label +func newContext(label string) (Context, error) { + c := make(Context) + + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) < 3 { + return c, ErrInvalidLabel + } + c["user"] = con[0] + c["role"] = con[1] + c["type"] = con[2] + if len(con) > 3 { + c["level"] = con[3] + } + } + return c, nil +} + +// clearLabels clears all reserved labels +func clearLabels() { + state.Lock() + state.mcsList = make(map[string]bool) + state.Unlock() +} + +// reserveLabel reserves the MLS/MCS level component of the specified label +func reserveLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + _ = mcsAdd(con[3]) + } + } +} + +func selinuxEnforcePath() string { + return filepath.Join(getSelinuxMountPoint(), "enforce") +} + +// isMLSEnabled checks if MLS is enabled. +func isMLSEnabled() bool { + enabledB, err := os.ReadFile(filepath.Join(getSelinuxMountPoint(), "mls")) + if err != nil { + return false + } + return bytes.Equal(enabledB, []byte{'1'}) +} + +// enforceMode returns the current SELinux mode Enforcing, Permissive, Disabled +func enforceMode() int { + var enforce int + + enforceB, err := os.ReadFile(selinuxEnforcePath()) + if err != nil { + return -1 + } + enforce, err = strconv.Atoi(string(enforceB)) + if err != nil { + return -1 + } + return enforce +} + +// setEnforceMode sets the current SELinux mode Enforcing, Permissive. +// Disabled is not valid, since this needs to be set at boot time. +func setEnforceMode(mode int) error { + return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0) +} + +// defaultEnforceMode returns the systems default SELinux mode Enforcing, +// Permissive or Disabled. Note this is just the default at boot time. +// EnforceMode tells you the systems current mode. +func defaultEnforceMode() int { + switch readConfig(selinuxTag) { + case "enforcing": + return Enforcing + case "permissive": + return Permissive + } + return Disabled +} + +func mcsAdd(mcs string) error { + if mcs == "" { + return nil + } + state.Lock() + defer state.Unlock() + if state.mcsList[mcs] { + return ErrMCSAlreadyExists + } + state.mcsList[mcs] = true + return nil +} + +func mcsDelete(mcs string) { + if mcs == "" { + return + } + state.Lock() + defer state.Unlock() + state.mcsList[mcs] = false +} + +func intToMcs(id int, catRange uint32) string { + var ( + SETSIZE = int(catRange) + TIER = SETSIZE + ORD = id + ) + + if id < 1 || id > 523776 { + return "" + } + + for ORD > TIER { + ORD -= TIER + TIER-- + } + TIER = SETSIZE - TIER + ORD += TIER + return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) +} + +func uniqMcs(catRange uint32) string { + var ( + n uint32 + c1, c2 uint32 + mcs string + ) + + for { + _ = binary.Read(rand.Reader, binary.LittleEndian, &n) + c1 = n % catRange + _ = binary.Read(rand.Reader, binary.LittleEndian, &n) + c2 = n % catRange + if c1 == c2 { + continue + } else if c1 > c2 { + c1, c2 = c2, c1 + } + mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) + if err := mcsAdd(mcs); err != nil { + continue + } + break + } + return mcs +} + +// releaseLabel un-reserves the MLS/MCS Level field of the specified label, +// allowing it to be used by another process. +func releaseLabel(label string) { + if len(label) != 0 { + con := strings.SplitN(label, ":", 4) + if len(con) > 3 { + mcsDelete(con[3]) + } + } +} + +// roFileLabel returns the specified SELinux readonly file label +func roFileLabel() string { + return readOnlyFileLabel +} + +func openContextFile() (*os.File, error) { + if f, err := os.Open(contextFile); err == nil { + return f, nil + } + return os.Open(filepath.Join(policyRoot(), "contexts", "lxc_contexts")) +} + +func loadLabels() { + labels = make(map[string]string) + in, err := openContextFile() + if err != nil { + return + } + defer in.Close() + + scanner := bufio.NewScanner(in) + + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + // Skip blank lines + continue + } + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + fields := bytes.SplitN(line, []byte{'='}, 2) + if len(fields) != 2 { + continue + } + key, val := bytes.TrimSpace(fields[0]), bytes.TrimSpace(fields[1]) + labels[string(key)] = string(bytes.Trim(val, `"`)) + } + + con, _ := NewContext(labels["file"]) + con["level"] = fmt.Sprintf("s0:c%d,c%d", maxCategory-2, maxCategory-1) + privContainerMountLabel = con.get() + reserveLabel(privContainerMountLabel) +} + +func label(key string) string { + loadLabelsOnce.Do(func() { + loadLabels() + }) + return labels[key] +} + +// kvmContainerLabels returns the default processLabel and mountLabel to be used +// for kvm containers by the calling process. +func kvmContainerLabels() (string, string) { + processLabel := label("kvm_process") + if processLabel == "" { + processLabel = label("process") + } + + return addMcs(processLabel, label("file")) +} + +// initContainerLabels returns the default processLabel and file labels to be +// used for containers running an init system like systemd by the calling process. +func initContainerLabels() (string, string) { + processLabel := label("init_process") + if processLabel == "" { + processLabel = label("process") + } + + return addMcs(processLabel, label("file")) +} + +// containerLabels returns an allocated processLabel and fileLabel to be used for +// container labeling by the calling process. +func containerLabels() (processLabel string, fileLabel string) { + if !getEnabled() { + return "", "" + } + + processLabel = label("process") + fileLabel = label("file") + readOnlyFileLabel = label("ro_file") + + if processLabel == "" || fileLabel == "" { + return "", fileLabel + } + + if readOnlyFileLabel == "" { + readOnlyFileLabel = fileLabel + } + + return addMcs(processLabel, fileLabel) +} + +func addMcs(processLabel, fileLabel string) (string, string) { + scon, _ := NewContext(processLabel) + if scon["level"] != "" { + mcs := uniqMcs(CategoryRange) + scon["level"] = mcs + processLabel = scon.Get() + scon, _ = NewContext(fileLabel) + scon["level"] = mcs + fileLabel = scon.Get() + } + return processLabel, fileLabel +} + +// securityCheckContext validates that the SELinux label is understood by the kernel +func securityCheckContext(val string) error { + return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0) +} + +// copyLevel returns a label with the MLS/MCS level from src label replaced on +// the dest label. +func copyLevel(src, dest string) (string, error) { + if src == "" { + return "", nil + } + if err := SecurityCheckContext(src); err != nil { + return "", err + } + if err := SecurityCheckContext(dest); err != nil { + return "", err + } + scon, err := NewContext(src) + if err != nil { + return "", err + } + tcon, err := NewContext(dest) + if err != nil { + return "", err + } + mcsDelete(tcon["level"]) + _ = mcsAdd(scon["level"]) + tcon["level"] = scon["level"] + return tcon.Get(), nil +} + +// chcon changes the fpath file object to the SELinux label. +// If fpath is a directory and recurse is true, then chcon walks the +// directory tree setting the label. +func chcon(fpath string, label string, recurse bool) error { + if fpath == "" { + return ErrEmptyPath + } + if label == "" { + return nil + } + + excludePaths := map[string]bool{ + "/": true, + "/bin": true, + "/boot": true, + "/dev": true, + "/etc": true, + "/etc/passwd": true, + "/etc/pki": true, + "/etc/shadow": true, + "/home": true, + "/lib": true, + "/lib64": true, + "/media": true, + "/opt": true, + "/proc": true, + "/root": true, + "/run": true, + "/sbin": true, + "/srv": true, + "/sys": true, + "/tmp": true, + "/usr": true, + "/var": true, + "/var/lib": true, + "/var/log": true, + } + + if home := os.Getenv("HOME"); home != "" { + excludePaths[home] = true + } + + if sudoUser := os.Getenv("SUDO_USER"); sudoUser != "" { + if usr, err := user.Lookup(sudoUser); err == nil { + excludePaths[usr.HomeDir] = true + } + } + + if fpath != "/" { + fpath = strings.TrimSuffix(fpath, "/") + } + if excludePaths[fpath] { + return fmt.Errorf("SELinux relabeling of %s is not allowed", fpath) + } + + if !recurse { + err := lSetFileLabel(fpath, label) + if err != nil { + // Check if file doesn't exist, must have been removed + if errors.Is(err, os.ErrNotExist) { + return nil + } + // Check if current label is correct on disk + flabel, nerr := lFileLabel(fpath) + if nerr == nil && flabel == label { + return nil + } + // Check if file doesn't exist, must have been removed + if errors.Is(nerr, os.ErrNotExist) { + return nil + } + return err + } + return nil + } + + return rchcon(fpath, label) +} + +func rchcon(fpath, label string) error { //revive:disable:cognitive-complexity + fastMode := false + // If the current label matches the new label, assume + // other labels are correct. + if cLabel, err := lFileLabel(fpath); err == nil && cLabel == label { + fastMode = true + } + return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { + if fastMode { + if cLabel, err := lFileLabel(p); err == nil && cLabel == label { + return nil + } + } + err := lSetFileLabel(p, label) + // Walk a file tree can race with removal, so ignore ENOENT. + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + }) +} + +// dupSecOpt takes an SELinux process label and returns security options that +// can be used to set the SELinux Type and Level for future container processes. +func dupSecOpt(src string) ([]string, error) { + if src == "" { + return nil, nil + } + con, err := NewContext(src) + if err != nil { + return nil, err + } + if con["user"] == "" || + con["role"] == "" || + con["type"] == "" { + return nil, nil + } + dup := []string{ + "user:" + con["user"], + "role:" + con["role"], + "type:" + con["type"], + } + + if con["level"] != "" { + dup = append(dup, "level:"+con["level"]) + } + + return dup, nil +} + +// findUserInContext scans the reader for a valid SELinux context +// match that is verified with the verifier. Invalid contexts are +// skipped. It returns a matched context or an empty string if no +// match is found. If a scanner error occurs, it is returned. +func findUserInContext(context Context, r io.Reader, verifier func(string) error) (string, error) { + fromRole := context["role"] + fromType := context["type"] + scanner := bufio.NewScanner(r) + + for scanner.Scan() { + fromConns := strings.Fields(scanner.Text()) + if len(fromConns) == 0 { + // Skip blank lines + continue + } + + line := fromConns[0] + + if line[0] == ';' || line[0] == '#' { + // Skip comments + continue + } + + // user context files contexts are formatted as + // role_r:type_t:s0 where the user is missing. + lineArr := strings.SplitN(line, ":", 4) + // skip context with typo, or role and type do not match + if len(lineArr) != 3 || + lineArr[0] != fromRole || + lineArr[1] != fromType { + continue + } + + for _, cc := range fromConns[1:] { + toConns := strings.SplitN(cc, ":", 4) + if len(toConns) != 3 { + continue + } + + context["role"] = toConns[0] + context["type"] = toConns[1] + + outConn := context.get() + if err := verifier(outConn); err != nil { + continue + } + + return outConn, nil + } + } + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("failed to scan for context: %w", err) + } + + return "", nil +} + +func getDefaultContextFromReaders(c *defaultSECtx) (string, error) { + if c.verifier == nil { + return "", ErrVerifierNil + } + + context, err := newContext(c.scon) + if err != nil { + return "", fmt.Errorf("failed to create label for %s: %w", c.scon, err) + } + + // set so the verifier validates the matched context with the provided user and level. + context["user"] = c.user + context["level"] = c.level + + conn, err := findUserInContext(context, c.userRdr, c.verifier) + if err != nil { + return "", err + } + + if conn != "" { + return conn, nil + } + + conn, err = findUserInContext(context, c.defaultRdr, c.verifier) + if err != nil { + return "", err + } + + if conn != "" { + return conn, nil + } + + return "", fmt.Errorf("context %q not found: %w", c.scon, ErrContextMissing) +} + +func getDefaultContextWithLevel(user, level, scon string) (string, error) { + userPath := filepath.Join(policyRoot(), selinuxUsersDir, user) + fu, err := os.Open(userPath) + if err != nil { + return "", err + } + defer fu.Close() + + defaultPath := filepath.Join(policyRoot(), defaultContexts) + fd, err := os.Open(defaultPath) + if err != nil { + return "", err + } + defer fd.Close() + + c := defaultSECtx{ + user: user, + level: level, + scon: scon, + userRdr: fu, + defaultRdr: fd, + verifier: securityCheckContext, + } + + return getDefaultContextFromReaders(&c) +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go new file mode 100644 index 0000000000..382244e503 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go @@ -0,0 +1,155 @@ +//go:build !linux +// +build !linux + +package selinux + +func readConThreadSelf(string) (string, error) { + return "", nil +} + +func writeConThreadSelf(string, string) error { + return nil +} + +func setDisabled() {} + +func getEnabled() bool { + return false +} + +func classIndex(string) (int, error) { + return -1, nil +} + +func setFileLabel(string, string) error { + return nil +} + +func lSetFileLabel(string, string) error { + return nil +} + +func fileLabel(string) (string, error) { + return "", nil +} + +func lFileLabel(string) (string, error) { + return "", nil +} + +func setFSCreateLabel(string) error { + return nil +} + +func fsCreateLabel() (string, error) { + return "", nil +} + +func currentLabel() (string, error) { + return "", nil +} + +func pidLabel(int) (string, error) { + return "", nil +} + +func execLabel() (string, error) { + return "", nil +} + +func canonicalizeContext(string) (string, error) { + return "", nil +} + +func computeCreateContext(string, string, string) (string, error) { + return "", nil +} + +func calculateGlbLub(string, string) (string, error) { + return "", nil +} + +func peerLabel(uintptr) (string, error) { + return "", nil +} + +func setKeyLabel(string) error { + return nil +} + +func keyLabel() (string, error) { + return "", nil +} + +func (c Context) get() string { + return "" +} + +func newContext(string) (Context, error) { + return Context{}, nil +} + +func clearLabels() { +} + +func reserveLabel(string) { +} + +func isMLSEnabled() bool { + return false +} + +func enforceMode() int { + return Disabled +} + +func setEnforceMode(int) error { + return nil +} + +func defaultEnforceMode() int { + return Disabled +} + +func releaseLabel(string) { +} + +func roFileLabel() string { + return "" +} + +func kvmContainerLabels() (string, string) { + return "", "" +} + +func initContainerLabels() (string, string) { + return "", "" +} + +func containerLabels() (string, string) { + return "", "" +} + +func securityCheckContext(string) error { + return nil +} + +func copyLevel(string, string) (string, error) { + return "", nil +} + +func chcon(string, string, bool) error { + return nil +} + +func dupSecOpt(string) ([]string, error) { + return nil, nil +} + +func getDefaultContextWithLevel(string, string, string) (string, error) { + return "", nil +} + +func label(_ string) string { + return "" +} diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go new file mode 100644 index 0000000000..559c851075 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go @@ -0,0 +1,71 @@ +package selinux + +import ( + "golang.org/x/sys/unix" +) + +// lgetxattr returns a []byte slice containing the value of +// an extended attribute attr set for path. +func lgetxattr(path, attr string) ([]byte, error) { + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := doLgetxattr(path, attr, dest) + for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare + // Buffer too small, use zero-sized buffer to get the actual size + sz, errno = doLgetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + + dest = make([]byte, sz) + sz, errno = doLgetxattr(path, attr, dest) + } + if errno != nil { + return nil, errno + } + + return dest[:sz], nil +} + +// doLgetxattr is a wrapper that retries on EINTR +func doLgetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Lgetxattr(path, attr, dest) + if err != unix.EINTR { + return sz, err + } + } +} + +// getxattr returns a []byte slice containing the value of +// an extended attribute attr set for path. +func getxattr(path, attr string) ([]byte, error) { + // Start with a 128 length byte array + dest := make([]byte, 128) + sz, errno := dogetxattr(path, attr, dest) + for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare + // Buffer too small, use zero-sized buffer to get the actual size + sz, errno = dogetxattr(path, attr, []byte{}) + if errno != nil { + return nil, errno + } + + dest = make([]byte, sz) + sz, errno = dogetxattr(path, attr, dest) + } + if errno != nil { + return nil, errno + } + + return dest[:sz], nil +} + +// dogetxattr is a wrapper that retries on EINTR +func dogetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Getxattr(path, attr, dest) + if err != unix.EINTR { + return sz, err + } + } +} diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md new file mode 100644 index 0000000000..b827e7dd73 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md @@ -0,0 +1,56 @@ +## pwalkdir: parallel implementation of filepath.WalkDir + +This is a wrapper for [filepath.WalkDir](https://pkg.go.dev/path/filepath#WalkDir) +which may speed it up by calling multiple callback functions (WalkDirFunc) +in parallel, utilizing goroutines. + +By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks. +This can be changed by using WalkN function which has the additional +parameter, specifying the number of goroutines (concurrency). + +### pwalk vs pwalkdir + +This package is very similar to +[pwalk](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir), +but utilizes `filepath.WalkDir` (added to Go 1.16), which does not call stat(2) +on every entry and is therefore faster (up to 3x, depending on usage scenario). + +Users who are OK with requiring Go 1.16+ should switch to this +implementation. + +### Caveats + +Please note the following limitations of this code: + +* Unlike filepath.WalkDir, the order of calls is non-deterministic; + +* Only primitive error handling is supported: + + * fs.SkipDir is not supported; + + * ErrNotExist errors from filepath.WalkDir are silently ignored for any path + except the top directory (WalkDir argument); any other error is returned to + the caller of WalkDir; + + * once any error is returned from any walkDirFunc instance, no more calls + to WalkDirFunc are made, and the error is returned to the caller of WalkDir; + + * if more than one WalkDirFunc instance will return an error, only one + of such errors will be propagated to and returned by WalkDir, others + will be silently discarded. + +### Documentation + +For the official documentation, see +https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir + +### Benchmarks + +For a WalkDirFunc that consists solely of the return statement, this +implementation is about 15% slower than the standard library's +filepath.WalkDir. + +Otherwise (if a WalkDirFunc is actually doing something) this is usually +faster, except when the WalkDirN(..., 1) is used. Run `go test -bench .` +to see how different operations can benefit from it, as well as how the +level of parallelism affects the speed. diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go new file mode 100644 index 0000000000..5d2d09a298 --- /dev/null +++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go @@ -0,0 +1,123 @@ +//go:build go1.16 +// +build go1.16 + +package pwalkdir + +import ( + "errors" + "fmt" + "io/fs" + "path/filepath" + "runtime" + "sync" +) + +// Walk is a wrapper for filepath.WalkDir which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// twice the runtime.NumCPU() walkFn will be called at any one time. +// If you want to change the maximum, use WalkN instead. +// +// The order of calls is non-deterministic. +// +// Note that this implementation only supports primitive error handling: +// +// - no errors are ever passed to walkFn; +// +// - once a walkFn returns any error, all further processing stops +// and the error is returned to the caller of Walk; +// +// - filepath.SkipDir is not supported; +// +// - if more than one walkFn instance will return an error, only one +// of such errors will be propagated and returned by Walk, others +// will be silently discarded. +func Walk(root string, walkFn fs.WalkDirFunc) error { + return WalkN(root, walkFn, runtime.NumCPU()*2) +} + +// WalkN is a wrapper for filepath.WalkDir which can call multiple walkFn +// in parallel, allowing to handle each item concurrently. A maximum of +// num walkFn will be called at any one time. +// +// Please see Walk documentation for caveats of using this function. +func WalkN(root string, walkFn fs.WalkDirFunc, num int) error { + // make sure limit is sensible + if num < 1 { + return fmt.Errorf("walk(%q): num must be > 0", root) + } + + files := make(chan *walkArgs, 2*num) + errCh := make(chan error, 1) // Get the first error, ignore others. + + // Start walking a tree asap. + var ( + err error + wg sync.WaitGroup + + rootLen = len(root) + rootEntry *walkArgs + ) + wg.Add(1) + go func() { + err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error { + if err != nil { + // Walking a file tree can race with removal, + // so ignore ENOENT, except for root. + // https://github.com/opencontainers/selinux/issues/199. + if errors.Is(err, fs.ErrNotExist) && len(p) != rootLen { + return nil + } + close(files) + return err + } + if len(p) == rootLen { + // Root entry is processed separately below. + rootEntry = &walkArgs{path: p, entry: entry} + return nil + } + // Add a file to the queue unless a callback sent an error. + select { + case e := <-errCh: + close(files) + return e + default: + files <- &walkArgs{path: p, entry: entry} + return nil + } + }) + if err == nil { + close(files) + } + wg.Done() + }() + + wg.Add(num) + for i := 0; i < num; i++ { + go func() { + for file := range files { + if e := walkFn(file.path, file.entry, nil); e != nil { + select { + case errCh <- e: // sent ok + default: // buffer full + } + } + } + wg.Done() + }() + } + + wg.Wait() + + if err == nil { + err = walkFn(rootEntry.path, rootEntry.entry, nil) + } + + return err +} + +// walkArgs holds the arguments that were passed to the Walk or WalkN +// functions. +type walkArgs struct { + entry fs.DirEntry + path string +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/.gitignore b/vendor/github.com/seccomp/libseccomp-golang/.gitignore new file mode 100644 index 0000000000..b4826968b8 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/.gitignore @@ -0,0 +1,4 @@ +*~ +*.swp +*.orig +tags diff --git a/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml b/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml new file mode 100644 index 0000000000..7df8aa1983 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/.golangci.yml @@ -0,0 +1,4 @@ +# For documentation, see https://golangci-lint.run/usage/configuration/ +linters: + enable: + - gofumpt diff --git a/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG b/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG new file mode 100644 index 0000000000..905a9b5cdc --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG @@ -0,0 +1,42 @@ +libseccomp-golang: Releases +=============================================================================== +https://github.com/seccomp/libseccomp-golang + +* Version 0.10.0 - June 9, 2022 +- Minimum supported version of libseccomp bumped to v2.3.1 +- Add seccomp userspace notification API (ActNotify, filter.*Notif*) +- Add filter.{Get,Set}SSB (to support SCMP_FLTATR_CTL_SSB) +- Add filter.{Get,Set}Optimize (to support SCMP_FLTATR_CTL_OPTIMIZE) +- Add filter.{Get,Set}RawRC (to support SCMP_FLTATR_API_SYSRAWRC) +- Add ArchPARISC, ArchPARISC64, ArchRISCV64 +- Add ActKillProcess and ActKillThread; deprecate ActKill +- Add go module support +- Return ErrSyscallDoesNotExist when unable to resolve a syscall +- Fix some functions to check for both kernel level API and libseccomp version +- Fix MakeCondition to use sanitizeCompareOp +- Fix AddRule to handle EACCES (from libseccomp >= 2.5.0) +- Updated the main docs and converted to README.md +- Added CONTRIBUTING.md, SECURITY.md, and administrative docs under doc/admin +- Add GitHub action CI, enable more linters +- test: test against various libseccomp versions +- test: fix and simplify execInSubprocess +- test: fix APILevelIsSupported +- Refactor the Errno(-1 * retCode) pattern +- Refactor/unify libseccomp version / API level checks +- Code cleanups (linter, formatting, spelling fixes) +- Cleanup: use errors.New instead of fmt.Errorf where appropriate +- Cleanup: remove duplicated cgo stuff, redundant linux build tag + +* Version 0.9.1 - May 21, 2019 +- Minimum supported version of libseccomp bumped to v2.2.0 +- Use Libseccomp's `seccomp_version` API to retrieve library version +- Unconditionally set TSync attribute for filters, due to Go's heavily threaded nature +- Fix CVE-2017-18367 - Multiple syscall arguments were incorrectly combined with logical-OR, instead of logical-AND +- Fix a failure to build on Debian-based distributions due to CGo code +- Fix unit test failures on 32-bit architectures +- Improve several errors to be more verbose about their causes +- Add support for SCMP_ACT_LOG (with libseccomp versions 2.4.x and higher), permitting syscalls but logging their execution +- Add support for SCMP_FLTATR_CTL_LOG (with libseccomp versions 2.4.x and higher), logging not-allowed actions when they are denied + +* Version 0.9.0 - January 5, 2017 +- Initial tagged release diff --git a/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md b/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md new file mode 100644 index 0000000000..c2fc80d5af --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/CONTRIBUTING.md @@ -0,0 +1,120 @@ +How to Submit Patches to the libseccomp-golang Project +=============================================================================== +https://github.com/seccomp/libseccomp-golang + +This document is intended to act as a guide to help you contribute to the +libseccomp-golang project. It is not perfect, and there will always be +exceptions to the rules described here, but by following the instructions below +you should have a much easier time getting your work merged with the upstream +project. + +## Test Your Code Using Existing Tests + +A number of tests and lint related recipes are provided in the Makefile, if +you want to run the standard regression tests, you can execute the following: + + # make check + +In order to use it, the 'golangci-lint' tool is needed, which can be found at: + +* https://github.com/golangci/golangci-lint + +## Add New Tests for New Functionality + +Any submissions which add functionality, or significantly change the existing +code, should include additional tests to verify the proper operation of the +proposed changes. + +## Explain Your Work + +At the top of every patch you should include a description of the problem you +are trying to solve, how you solved it, and why you chose the solution you +implemented. If you are submitting a bug fix, it is also incredibly helpful +if you can describe/include a reproducer for the problem in the description as +well as instructions on how to test for the bug and verify that it has been +fixed. + +## Sign Your Work + +The sign-off is a simple line at the end of the patch description, which +certifies that you wrote it or otherwise have the right to pass it on as an +open-source patch. The "Developer's Certificate of Origin" pledge is taken +from the Linux Kernel and the rules are pretty simple: + + Developer's Certificate of Origin 1.1 + + By making a contribution to this project, I certify that: + + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + +... then you just add a line to the bottom of your patch description, with +your real name, saying: + + Signed-off-by: Random J Developer + +You can add this to your commit description in `git` with `git commit -s` + +## Post Your Patches Upstream + +The libseccomp project accepts both GitHub pull requests and patches sent via +the mailing list. GitHub pull requests are preferred. This sections below +explain how to contribute via either method. Please read each step and perform +all steps that apply to your chosen contribution method. + +### Submitting via Email + +Depending on how you decided to work with the libseccomp code base and what +tools you are using there are different ways to generate your patch(es). +However, regardless of what tools you use, you should always generate your +patches using the "unified" diff/patch format and the patches should always +apply to the libseccomp source tree using the following command from the top +directory of the libseccomp sources: + + # patch -p1 < changes.patch + +If you are not using git, stacked git (stgit), or some other tool which can +generate patch files for you automatically, you may find the following command +helpful in generating patches, where "libseccomp.orig/" is the unmodified +source code directory and "libseccomp/" is the source code directory with your +changes: + + # diff -purN libseccomp.orig/ libseccomp/ + +When in doubt please generate your patch and try applying it to an unmodified +copy of the libseccomp sources; if it fails for you, it will fail for the rest +of us. + +Finally, you will need to email your patches to the mailing list so they can +be reviewed and potentially merged into the main libseccomp repository. When +sending patches to the mailing list it is important to send your email in text +form, no HTML mail please, and ensure that your email client does not mangle +your patches. It should be possible to save your raw email to disk and apply +it directly to the libseccomp source code; if that fails then you likely have +a problem with your email client. When in doubt try a test first by sending +yourself an email with your patch and attempting to apply the emailed patch to +the libseccomp repository; if it fails for you, it will fail for the rest of +us trying to test your patch and include it in the main libseccomp repository. + +### Submitting via GitHub + +See [this guide](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) if you've never done this before. diff --git a/vendor/github.com/seccomp/libseccomp-golang/LICENSE b/vendor/github.com/seccomp/libseccomp-golang/LICENSE new file mode 100644 index 0000000000..81cf60de29 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015 Matthew Heon +Copyright (c) 2015 Paul Moore +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/seccomp/libseccomp-golang/Makefile b/vendor/github.com/seccomp/libseccomp-golang/Makefile new file mode 100644 index 0000000000..530f5b4adb --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/Makefile @@ -0,0 +1,31 @@ +# libseccomp-golang + +.PHONY: all check check-build check-syntax fix-syntax vet test lint + +all: check-build + +check: lint test + +check-build: + go build + +check-syntax: + gofmt -d . + +fix-syntax: + gofmt -w . + +vet: + go vet -v ./... + +# Previous bugs have made the tests freeze until the timeout. Golang default +# timeout for tests is 10 minutes, which is too long, considering current tests +# can be executed in less than 1 second. Reduce the timeout, so problems can +# be noticed earlier in the CI. +TEST_TIMEOUT=10s + +test: + go test -v -timeout $(TEST_TIMEOUT) + +lint: + golangci-lint run . diff --git a/vendor/github.com/seccomp/libseccomp-golang/README.md b/vendor/github.com/seccomp/libseccomp-golang/README.md new file mode 100644 index 0000000000..312135ee59 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/README.md @@ -0,0 +1,59 @@ +![libseccomp Golang Bindings](https://github.com/seccomp/libseccomp-artwork/blob/main/logo/libseccomp-color_text.png) +=============================================================================== +https://github.com/seccomp/libseccomp-golang + +[![Go Reference](https://pkg.go.dev/badge/github.com/seccomp/libseccomp-golang.svg)](https://pkg.go.dev/github.com/seccomp/libseccomp-golang) +[![validate](https://github.com/seccomp/libseccomp-golang/actions/workflows/validate.yml/badge.svg)](https://github.com/seccomp/libseccomp-golang/actions/workflows/validate.yml) +[![test](https://github.com/seccomp/libseccomp-golang/actions/workflows/test.yml/badge.svg)](https://github.com/seccomp/libseccomp-golang/actions/workflows/test.yml) + +The libseccomp library provides an easy to use, platform independent, interface +to the Linux Kernel's syscall filtering mechanism. The libseccomp API is +designed to abstract away the underlying BPF based syscall filter language and +present a more conventional function-call based filtering interface that should +be familiar to, and easily adopted by, application developers. + +The libseccomp-golang library provides a Go based interface to the libseccomp +library. + +## Online Resources + +The library source repository currently lives on GitHub at the following URLs: + +* https://github.com/seccomp/libseccomp-golang +* https://github.com/seccomp/libseccomp + +Documentation for this package is also available at: + +* https://pkg.go.dev/github.com/seccomp/libseccomp-golang + +## Verifying Releases + +Starting with libseccomp-golang v0.10.0, the git tag corresponding to each +release should be signed by one of the libseccomp-golang maintainers. It is +recommended that before use you verify the release tags using the following +command: + + % git tag -v + +At present, only the following keys, specified via the fingerprints below, are +authorized to sign official libseccomp-golang release tags: + + Paul Moore + 7100 AADF AE6E 6E94 0D2E 0AD6 55E4 5A5A E8CA 7C8A + + Tom Hromatka + 47A6 8FCE 37C7 D702 4FD6 5E11 356C E62C 2B52 4099 + + Kir Kolyshkin + C242 8CD7 5720 FACD CF76 B6EA 17DE 5ECB 75A1 100E + +More information on GnuPG and git tag verification can be found at their +respective websites: https://git-scm.com/docs/git and https://gnupg.org. + +## Installing the package + + % go get github.com/seccomp/libseccomp-golang + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md b/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md new file mode 100644 index 0000000000..f645d4efec --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/SECURITY.md @@ -0,0 +1,48 @@ +The libseccomp-golang Security Vulnerability Handling Process +=============================================================================== +https://github.com/seccomp/libseccomp-golang + +This document document attempts to describe the processes through which +sensitive security relevant bugs can be responsibly disclosed to the +libseccomp-golang project and how the project maintainers should handle these +reports. Just like the other libseccomp-golang process documents, this +document should be treated as a guiding document and not a hard, unyielding set +of regulations; the bug reporters and project maintainers are encouraged to +work together to address the issues as best they can, in a manner which works +best for all parties involved. + +### Reporting Problems + +Problems with the libseccomp-golang library that are not suitable for immediate +public disclosure should be emailed to the current libseccomp-golang +maintainers, the list is below. We typically request at most a 90 day time +period to address the issue before it is made public, but we will make every +effort to address the issue as quickly as possible and shorten the disclosure +window. + +* Paul Moore, paul@paul-moore.com +* Tom Hromatka, tom.hromatka@oracle.com +* Kir Kolyshkin, kolyshkin@gmail.com + +### Resolving Sensitive Security Issues + +Upon disclosure of a bug, the maintainers should work together to investigate +the problem and decide on a solution. In order to prevent an early disclosure +of the problem, those working on the solution should do so privately and +outside of the traditional libseccomp-golang development practices. One +possible solution to this is to leverage the GitHub "Security" functionality to +create a private development fork that can be shared among the maintainers, and +optionally the reporter. A placeholder GitHub issue may be created, but +details should remain extremely limited until such time as the problem has been +fixed and responsibly disclosed. If a CVE, or other tag, has been assigned to +the problem, the GitHub issue title should include the vulnerability tag once +the problem has been disclosed. + +### Public Disclosure + +Whenever possible, responsible reporting and patching practices should be +followed, including notification to the linux-distros and oss-security mailing +lists. + +* https://oss-security.openwall.org/wiki/mailing-lists/distros +* https://oss-security.openwall.org/wiki/mailing-lists/oss-security diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go new file mode 100644 index 0000000000..c23406754c --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go @@ -0,0 +1,1188 @@ +// Public API specification for libseccomp Go bindings +// Contains public API for the bindings + +// Package seccomp provides bindings for libseccomp, a library wrapping the Linux +// seccomp syscall. Seccomp enables an application to restrict system call use +// for itself and its children. +package seccomp + +import ( + "errors" + "fmt" + "os" + "runtime" + "strings" + "sync" + "syscall" + "unsafe" +) + +// #include +// #include +import "C" + +// Exported types + +// VersionError represents an error when either the system libseccomp version +// or the kernel version is too old to perform the operation requested. +type VersionError struct { + op string // operation that failed or would fail + major, minor, micro uint // minimally required libseccomp version + curAPI, minAPI uint // current and minimally required API versions +} + +func init() { + // This forces the cgo libseccomp to initialize its internal API support state, + // which is necessary on older versions of libseccomp in order to work + // correctly. + _, _ = getAPI() +} + +func (e VersionError) Error() string { + if e.minAPI != 0 { + return fmt.Sprintf("%s requires libseccomp >= %d.%d.%d and API level >= %d "+ + "(current version: %d.%d.%d, API level: %d)", + e.op, e.major, e.minor, e.micro, e.minAPI, + verMajor, verMinor, verMicro, e.curAPI) + } + return fmt.Sprintf("%s requires libseccomp >= %d.%d.%d (current version: %d.%d.%d)", + e.op, e.major, e.minor, e.micro, verMajor, verMinor, verMicro) +} + +// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a +// per-architecture basis. +type ScmpArch uint + +// ScmpAction represents an action to be taken on a filter rule match in +// libseccomp +type ScmpAction uint + +// ScmpCompareOp represents a comparison operator which can be used in a filter +// rule +type ScmpCompareOp uint + +// ScmpCondition represents a rule in a libseccomp filter context +type ScmpCondition struct { + Argument uint `json:"argument,omitempty"` + Op ScmpCompareOp `json:"operator,omitempty"` + Operand1 uint64 `json:"operand_one,omitempty"` + Operand2 uint64 `json:"operand_two,omitempty"` +} + +// Seccomp userspace notification structures associated with filters that use the ActNotify action. + +// ScmpSyscall identifies a Linux System Call by its number. +type ScmpSyscall int32 + +// ScmpFd represents a file-descriptor used for seccomp userspace notifications. +type ScmpFd int32 + +// ScmpNotifData describes the system call context that triggered a notification. +// +// Syscall: the syscall number +// Arch: the filter architecture +// InstrPointer: address of the instruction that triggered a notification +// Args: arguments (up to 6) for the syscall +// +type ScmpNotifData struct { + Syscall ScmpSyscall `json:"syscall,omitempty"` + Arch ScmpArch `json:"arch,omitempty"` + InstrPointer uint64 `json:"instr_pointer,omitempty"` + Args []uint64 `json:"args,omitempty"` +} + +// ScmpNotifReq represents a seccomp userspace notification. See NotifReceive() for +// info on how to pull such a notification. +// +// ID: notification ID +// Pid: process that triggered the notification event +// Flags: filter flags (see seccomp(2)) +// Data: system call context that triggered the notification +// +type ScmpNotifReq struct { + ID uint64 `json:"id,omitempty"` + Pid uint32 `json:"pid,omitempty"` + Flags uint32 `json:"flags,omitempty"` + Data ScmpNotifData `json:"data,omitempty"` +} + +// ScmpNotifResp represents a seccomp userspace notification response. See NotifRespond() +// for info on how to push such a response. +// +// ID: notification ID (must match the corresponding ScmpNotifReq ID) +// Error: must be 0 if no error occurred, or an error constant from package +// syscall (e.g., syscall.EPERM, etc). In the latter case, it's used +// as an error return from the syscall that created the notification. +// Val: return value for the syscall that created the notification. Only +// relevant if Error is 0. +// Flags: userspace notification response flag (e.g., NotifRespFlagContinue) +// +type ScmpNotifResp struct { + ID uint64 `json:"id,omitempty"` + Error int32 `json:"error,omitempty"` + Val uint64 `json:"val,omitempty"` + Flags uint32 `json:"flags,omitempty"` +} + +// Exported Constants + +const ( + // Valid architectures recognized by libseccomp + // PowerPC and S390(x) architectures are unavailable below library version + // v2.3.0 and will returns errors if used with incompatible libraries + + // ArchInvalid is a placeholder to ensure uninitialized ScmpArch + // variables are invalid + ArchInvalid ScmpArch = iota + // ArchNative is the native architecture of the kernel + ArchNative + // ArchX86 represents 32-bit x86 syscalls + ArchX86 + // ArchAMD64 represents 64-bit x86-64 syscalls + ArchAMD64 + // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) + ArchX32 + // ArchARM represents 32-bit ARM syscalls + ArchARM + // ArchARM64 represents 64-bit ARM syscalls + ArchARM64 + // ArchMIPS represents 32-bit MIPS syscalls + ArchMIPS + // ArchMIPS64 represents 64-bit MIPS syscalls + ArchMIPS64 + // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) + ArchMIPS64N32 + // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) + ArchMIPSEL + // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) + ArchMIPSEL64 + // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, + // 32-bit pointers) + ArchMIPSEL64N32 + // ArchPPC represents 32-bit POWERPC syscalls + ArchPPC + // ArchPPC64 represents 64-bit POWER syscalls (big endian) + ArchPPC64 + // ArchPPC64LE represents 64-bit POWER syscalls (little endian) + ArchPPC64LE + // ArchS390 represents 31-bit System z/390 syscalls + ArchS390 + // ArchS390X represents 64-bit System z/390 syscalls + ArchS390X + // ArchPARISC represents 32-bit PA-RISC + ArchPARISC + // ArchPARISC64 represents 64-bit PA-RISC + ArchPARISC64 + // ArchRISCV64 represents RISCV64 + ArchRISCV64 +) + +const ( + // Supported actions on filter match + + // ActInvalid is a placeholder to ensure uninitialized ScmpAction + // variables are invalid + ActInvalid ScmpAction = iota + // ActKillThread kills the thread that violated the rule. + // All other threads from the same thread group will continue to execute. + ActKillThread + // ActTrap throws SIGSYS + ActTrap + // ActNotify triggers a userspace notification. This action is only usable when + // libseccomp API level 6 or higher is supported. + ActNotify + // ActErrno causes the syscall to return a negative error code. This + // code can be set with the SetReturnCode method + ActErrno + // ActTrace causes the syscall to notify tracing processes with the + // given error code. This code can be set with the SetReturnCode method + ActTrace + // ActAllow permits the syscall to continue execution + ActAllow + // ActLog permits the syscall to continue execution after logging it. + // This action is only usable when libseccomp API level 3 or higher is + // supported. + ActLog + // ActKillProcess kills the process that violated the rule. + // All threads in the thread group are also terminated. + // This action is only usable when libseccomp API level 3 or higher is + // supported. + ActKillProcess + // ActKill kills the thread that violated the rule. + // All other threads from the same thread group will continue to execute. + // + // Deprecated: use ActKillThread + ActKill = ActKillThread +) + +const ( + // These are comparison operators used in conditional seccomp rules + // They are used to compare the value of a single argument of a syscall + // against a user-defined constant + + // CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp + // variables are invalid + CompareInvalid ScmpCompareOp = iota + // CompareNotEqual returns true if the argument is not equal to the + // given value + CompareNotEqual + // CompareLess returns true if the argument is less than the given value + CompareLess + // CompareLessOrEqual returns true if the argument is less than or equal + // to the given value + CompareLessOrEqual + // CompareEqual returns true if the argument is equal to the given value + CompareEqual + // CompareGreaterEqual returns true if the argument is greater than or + // equal to the given value + CompareGreaterEqual + // CompareGreater returns true if the argument is greater than the given + // value + CompareGreater + // CompareMaskedEqual returns true if the masked argument value is + // equal to the masked datum value. Mask is the first argument, and + // datum is the second one. + CompareMaskedEqual +) + +// ErrSyscallDoesNotExist represents an error condition where +// libseccomp is unable to resolve the syscall. +var ErrSyscallDoesNotExist = errors.New("could not resolve syscall name") + +const ( + // Userspace notification response flags + + // NotifRespFlagContinue tells the kernel to continue executing the system + // call that triggered the notification. Must only be used when the notification + // response's error is 0. + NotifRespFlagContinue uint32 = 1 +) + +// Helpers for types + +// GetArchFromString returns an ScmpArch constant from a string representing an +// architecture +func GetArchFromString(arch string) (ScmpArch, error) { + if err := ensureSupportedVersion(); err != nil { + return ArchInvalid, err + } + + switch strings.ToLower(arch) { + case "x86": + return ArchX86, nil + case "amd64", "x86-64", "x86_64", "x64": + return ArchAMD64, nil + case "x32": + return ArchX32, nil + case "arm": + return ArchARM, nil + case "arm64", "aarch64": + return ArchARM64, nil + case "mips": + return ArchMIPS, nil + case "mips64": + return ArchMIPS64, nil + case "mips64n32": + return ArchMIPS64N32, nil + case "mipsel": + return ArchMIPSEL, nil + case "mipsel64": + return ArchMIPSEL64, nil + case "mipsel64n32": + return ArchMIPSEL64N32, nil + case "ppc": + return ArchPPC, nil + case "ppc64": + return ArchPPC64, nil + case "ppc64le": + return ArchPPC64LE, nil + case "s390": + return ArchS390, nil + case "s390x": + return ArchS390X, nil + case "parisc": + return ArchPARISC, nil + case "parisc64": + return ArchPARISC64, nil + case "riscv64": + return ArchRISCV64, nil + default: + return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch) + } +} + +// String returns a string representation of an architecture constant +func (a ScmpArch) String() string { + switch a { + case ArchX86: + return "x86" + case ArchAMD64: + return "amd64" + case ArchX32: + return "x32" + case ArchARM: + return "arm" + case ArchARM64: + return "arm64" + case ArchMIPS: + return "mips" + case ArchMIPS64: + return "mips64" + case ArchMIPS64N32: + return "mips64n32" + case ArchMIPSEL: + return "mipsel" + case ArchMIPSEL64: + return "mipsel64" + case ArchMIPSEL64N32: + return "mipsel64n32" + case ArchPPC: + return "ppc" + case ArchPPC64: + return "ppc64" + case ArchPPC64LE: + return "ppc64le" + case ArchS390: + return "s390" + case ArchS390X: + return "s390x" + case ArchPARISC: + return "parisc" + case ArchPARISC64: + return "parisc64" + case ArchRISCV64: + return "riscv64" + case ArchNative: + return "native" + case ArchInvalid: + return "Invalid architecture" + default: + return fmt.Sprintf("Unknown architecture %#x", uint(a)) + } +} + +// String returns a string representation of a comparison operator constant +func (a ScmpCompareOp) String() string { + switch a { + case CompareNotEqual: + return "Not equal" + case CompareLess: + return "Less than" + case CompareLessOrEqual: + return "Less than or equal to" + case CompareEqual: + return "Equal" + case CompareGreaterEqual: + return "Greater than or equal to" + case CompareGreater: + return "Greater than" + case CompareMaskedEqual: + return "Masked equality" + case CompareInvalid: + return "Invalid comparison operator" + default: + return fmt.Sprintf("Unrecognized comparison operator %#x", uint(a)) + } +} + +// String returns a string representation of a seccomp match action +func (a ScmpAction) String() string { + switch a & 0xFFFF { + case ActKillThread: + return "Action: Kill thread" + case ActKillProcess: + return "Action: Kill process" + case ActTrap: + return "Action: Send SIGSYS" + case ActErrno: + return fmt.Sprintf("Action: Return error code %d", (a >> 16)) + case ActTrace: + return fmt.Sprintf("Action: Notify tracing processes with code %d", + (a >> 16)) + case ActNotify: + return "Action: Notify userspace" + case ActLog: + return "Action: Log system call" + case ActAllow: + return "Action: Allow system call" + default: + return fmt.Sprintf("Unrecognized Action %#x", uint(a)) + } +} + +// SetReturnCode adds a return code to a supporting ScmpAction, clearing any +// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise. +// Accepts 16-bit return code as argument. +// Returns a valid ScmpAction of the original type with the new error code set. +func (a ScmpAction) SetReturnCode(code int16) ScmpAction { + aTmp := a & 0x0000FFFF + if aTmp == ActErrno || aTmp == ActTrace { + return (aTmp | (ScmpAction(code)&0xFFFF)<<16) + } + return a +} + +// GetReturnCode returns the return code of an ScmpAction +func (a ScmpAction) GetReturnCode() int16 { + return int16(a >> 16) +} + +// General utility functions + +// GetLibraryVersion returns the version of the library the bindings are built +// against. +// The version is formatted as follows: Major.Minor.Micro +func GetLibraryVersion() (major, minor, micro uint) { + return verMajor, verMinor, verMicro +} + +// GetAPI returns the API level supported by the system. +// Returns a positive int containing the API level, or 0 with an error if the +// API level could not be detected due to the library being older than v2.4.0. +// See the seccomp_api_get(3) man page for details on available API levels: +// https://github.com/seccomp/libseccomp/blob/main/doc/man/man3/seccomp_api_get.3 +func GetAPI() (uint, error) { + return getAPI() +} + +// SetAPI forcibly sets the API level. General use of this function is strongly +// discouraged. +// Returns an error if the API level could not be set. An error is always +// returned if the library is older than v2.4.0 +// See the seccomp_api_get(3) man page for details on available API levels: +// https://github.com/seccomp/libseccomp/blob/main/doc/man/man3/seccomp_api_get.3 +func SetAPI(api uint) error { + return setAPI(api) +} + +// Syscall functions + +// GetName retrieves the name of a syscall from its number. +// Acts on any syscall number. +// Returns either a string containing the name of the syscall, or an error. +func (s ScmpSyscall) GetName() (string, error) { + return s.GetNameByArch(ArchNative) +} + +// GetNameByArch retrieves the name of a syscall from its number for a given +// architecture. +// Acts on any syscall number. +// Accepts a valid architecture constant. +// Returns either a string containing the name of the syscall, or an error. +// if the syscall is unrecognized or an issue occurred. +func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { + if err := sanitizeArch(arch); err != nil { + return "", err + } + + cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) + if cString == nil { + return "", ErrSyscallDoesNotExist + } + defer C.free(unsafe.Pointer(cString)) + + finalStr := C.GoString(cString) + return finalStr, nil +} + +// GetSyscallFromName returns the number of a syscall by name on the kernel's +// native architecture. +// Accepts a string containing the name of a syscall. +// Returns the number of the syscall, or an error if no syscall with that name +// was found. +func GetSyscallFromName(name string) (ScmpSyscall, error) { + if err := ensureSupportedVersion(); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name(cString) + if result == scmpError { + return 0, ErrSyscallDoesNotExist + } + + return ScmpSyscall(result), nil +} + +// GetSyscallFromNameByArch returns the number of a syscall by name for a given +// architecture's ABI. +// Accepts the name of a syscall and an architecture constant. +// Returns the number of the syscall, or an error if an invalid architecture is +// passed or a syscall with that name was not found. +func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { + if err := ensureSupportedVersion(); err != nil { + return 0, err + } + if err := sanitizeArch(arch); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) + if result == scmpError { + return 0, ErrSyscallDoesNotExist + } + + return ScmpSyscall(result), nil +} + +// MakeCondition creates and returns a new condition to attach to a filter rule. +// Associated rules will only match if this condition is true. +// Accepts the number the argument we are checking, and a comparison operator +// and value to compare to. +// The rule will match if argument $arg (zero-indexed) of the syscall is +// $COMPARE_OP the provided comparison value. +// Some comparison operators accept two values. Masked equals, for example, +// will mask $arg of the syscall with the second value provided (via bitwise +// AND) and then compare against the first value provided. +// For example, in the less than or equal case, if the syscall argument was +// 0 and the value provided was 1, the condition would match, as 0 is less +// than or equal to 1. +// Return either an error on bad argument or a valid ScmpCondition struct. +func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { + var condStruct ScmpCondition + + if err := ensureSupportedVersion(); err != nil { + return condStruct, err + } + + if err := sanitizeCompareOp(comparison); err != nil { + return condStruct, err + } else if arg > 5 { + return condStruct, fmt.Errorf("syscalls only have up to 6 arguments (%d given)", arg) + } else if len(values) > 2 { + return condStruct, fmt.Errorf("conditions can have at most 2 arguments (%d given)", len(values)) + } else if len(values) == 0 { + return condStruct, errors.New("must provide at least one value to compare against") + } + + condStruct.Argument = arg + condStruct.Op = comparison + condStruct.Operand1 = values[0] + if len(values) == 2 { + condStruct.Operand2 = values[1] + } else { + condStruct.Operand2 = 0 // Unused + } + + return condStruct, nil +} + +// Utility Functions + +// GetNativeArch returns architecture token representing the native kernel +// architecture +func GetNativeArch() (ScmpArch, error) { + if err := ensureSupportedVersion(); err != nil { + return ArchInvalid, err + } + + arch := C.seccomp_arch_native() + + return archFromNative(arch) +} + +// Public Filter API + +// ScmpFilter represents a filter context in libseccomp. +// A filter context is initially empty. Rules can be added to it, and it can +// then be loaded into the kernel. +type ScmpFilter struct { + filterCtx C.scmp_filter_ctx + valid bool + lock sync.Mutex +} + +// NewFilter creates and returns a new filter context. Accepts a default action to be +// taken for syscalls which match no rules in the filter. +// Returns a reference to a valid filter context, or nil and an error +// if the filter context could not be created or an invalid default action was given. +func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { + if err := ensureSupportedVersion(); err != nil { + return nil, err + } + + if err := sanitizeAction(defaultAction); err != nil { + return nil, err + } + + fPtr := C.seccomp_init(defaultAction.toNative()) + if fPtr == nil { + return nil, errors.New("could not create filter") + } + + filter := new(ScmpFilter) + filter.filterCtx = fPtr + filter.valid = true + runtime.SetFinalizer(filter, filterFinalizer) + + // Enable TSync so all goroutines will receive the same rules. + // If the kernel does not support TSYNC, allow us to continue without error. + if err := filter.setFilterAttr(filterAttrTsync, 0x1); err != nil && err != syscall.ENOTSUP { + filter.Release() + return nil, fmt.Errorf("could not create filter: error setting tsync bit: %w", err) + } + + return filter, nil +} + +// IsValid determines whether a filter context is valid to use. +// Some operations (Release and Merge) render filter contexts invalid and +// consequently prevent further use. +func (f *ScmpFilter) IsValid() bool { + f.lock.Lock() + defer f.lock.Unlock() + + return f.valid +} + +// Reset resets a filter context, removing all its existing state. +// Accepts a new default action to be taken for syscalls which do not match. +// Returns an error if the filter or action provided are invalid. +func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeAction(defaultAction); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()); retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// Release releases a filter context, freeing its memory. Should be called after +// loading into the kernel, when the filter is no longer needed. +// After calling this function, the given filter is no longer valid and cannot +// be used. +// Release() will be invoked automatically when a filter context is garbage +// collected, but can also be called manually to free memory. +func (f *ScmpFilter) Release() { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return + } + + f.valid = false + C.seccomp_release(f.filterCtx) +} + +// Merge merges two filter contexts. +// The source filter src will be released as part of the process, and will no +// longer be usable or valid after this call. +// To be merged, filters must NOT share any architectures, and all their +// attributes (Default Action, Bad Arch Action, and No New Privs bools) +// must match. +// The filter src will be merged into the filter this is called on. +// The architectures of the src filter not present in the destination, and all +// associated rules, will be added to the destination. +// Returns an error if merging the filters failed. +func (f *ScmpFilter) Merge(src *ScmpFilter) error { + f.lock.Lock() + defer f.lock.Unlock() + + src.lock.Lock() + defer src.lock.Unlock() + + if !src.valid || !f.valid { + return errors.New("one or more of the filter contexts is invalid or uninitialized") + } + + // Merge the filters + if retCode := C.seccomp_merge(f.filterCtx, src.filterCtx); retCode != 0 { + e := errRc(retCode) + if e == syscall.EINVAL { + return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter: %w", e) + } + return e + } + + src.valid = false + + return nil +} + +// IsArchPresent checks if an architecture is present in a filter. +// If a filter contains an architecture, it uses its default action for +// syscalls which do not match rules in it, and its rules can match syscalls +// for that ABI. +// If a filter does not contain an architecture, all syscalls made to that +// kernel ABI will fail with the filter's default Bad Architecture Action +// (by default, killing the process). +// Accepts an architecture constant. +// Returns true if the architecture is present in the filter, false otherwise, +// and an error on an invalid filter context, architecture constant, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return false, err + } else if !f.valid { + return false, errBadFilter + } + + if retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()); retCode != 0 { + e := errRc(retCode) + if e == syscall.EEXIST { + // -EEXIST is "arch not present" + return false, nil + } + return false, e + } + + return true, nil +} + +// AddArch adds an architecture to the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) AddArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Libseccomp returns -EEXIST if the specified architecture is already + // present. Succeed silently in this case, as it's not fatal, and the + // architecture is present already. + if retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()); retCode != 0 { + if e := errRc(retCode); e != syscall.EEXIST { + return e + } + } + + return nil +} + +// RemoveArch removes an architecture from the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return errBadFilter + } + + // Similar to AddArch, -EEXIST is returned if the arch is not present + // Succeed silently in that case, this is not fatal and the architecture + // is not present in the filter after RemoveArch + if retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()); retCode != 0 { + if e := errRc(retCode); e != syscall.EEXIST { + return e + } + } + + return nil +} + +// Load loads a filter context into the kernel. +// Returns an error if the filter context is invalid or the syscall failed. +func (f *ScmpFilter) Load() error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// GetDefaultAction returns the default action taken on a syscall which does not +// match a rule in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActDefault) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetBadArchAction returns the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// retrieving the value. +func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActBadArch) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set +// to on the filter being loaded, or an error if an issue was encountered +// retrieving the value. +// The No New Privileges bit tells the kernel that new processes run with exec() +// cannot gain more privileges than the process that ran exec(). +// For example, a process with No New Privileges set would be unable to exec +// setuid/setgid executables. +func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { + noNewPrivs, err := f.getFilterAttr(filterAttrNNP) + if err != nil { + return false, err + } + + if noNewPrivs == 0 { + return false, nil + } + + return true, nil +} + +// GetLogBit returns the current state the Log bit will be set to on the filter +// being loaded, or an error if an issue was encountered retrieving the value. +// The Log bit tells the kernel that all actions taken by the filter, with the +// exception of ActAllow, should be logged. +// The Log bit is only usable when libseccomp API level 3 or higher is +// supported. +func (f *ScmpFilter) GetLogBit() (bool, error) { + log, err := f.getFilterAttr(filterAttrLog) + if err != nil { + if e := checkAPI("GetLogBit", 3, 2, 4, 0); e != nil { + err = e + } + + return false, err + } + + if log == 0 { + return false, nil + } + + return true, nil +} + +// GetSSB returns the current state the SSB bit will be set to on the filter +// being loaded, or an error if an issue was encountered retrieving the value. +// The SSB bit tells the kernel that a seccomp user is not interested in enabling +// Speculative Store Bypass mitigation. +// The SSB bit is only usable when libseccomp API level 4 or higher is +// supported. +func (f *ScmpFilter) GetSSB() (bool, error) { + ssb, err := f.getFilterAttr(filterAttrSSB) + if err != nil { + if e := checkAPI("GetSSB", 4, 2, 5, 0); e != nil { + err = e + } + + return false, err + } + + if ssb == 0 { + return false, nil + } + + return true, nil +} + +// GetOptimize returns the current optimization level of the filter, +// or an error if an issue was encountered retrieving the value. +// See SetOptimize for more details. +func (f *ScmpFilter) GetOptimize() (int, error) { + level, err := f.getFilterAttr(filterAttrOptimize) + if err != nil { + if e := checkAPI("GetOptimize", 4, 2, 5, 0); e != nil { + err = e + } + + return 0, err + } + + return int(level), nil +} + +// GetRawRC returns the current state of RawRC flag, or an error +// if an issue was encountered retrieving the value. +// See SetRawRC for more details. +func (f *ScmpFilter) GetRawRC() (bool, error) { + rawrc, err := f.getFilterAttr(filterAttrRawRC) + if err != nil { + if e := checkAPI("GetRawRC", 4, 2, 5, 0); e != nil { + err = e + } + + return false, err + } + + if rawrc == 0 { + return false, nil + } + + return true, nil +} + +// SetBadArchAction sets the default action taken on a syscall for an +// architecture not in the filter, or an error if an issue was encountered +// setting the value. +func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { + if err := sanitizeAction(action); err != nil { + return err + } + + return f.setFilterAttr(filterAttrActBadArch, action.toNative()) +} + +// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be +// applied on filter load, or an error if an issue was encountered setting the +// value. +// Filters with No New Privileges set to 0 can only be loaded if the process +// has the CAP_SYS_ADMIN capability. +func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrNNP, toSet) +} + +// SetLogBit sets the state of the Log bit, which will be applied on filter +// load, or an error if an issue was encountered setting the value. +// The Log bit is only usable when libseccomp API level 3 or higher is +// supported. +func (f *ScmpFilter) SetLogBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + err := f.setFilterAttr(filterAttrLog, toSet) + if err != nil { + if e := checkAPI("SetLogBit", 3, 2, 4, 0); e != nil { + err = e + } + } + + return err +} + +// SetSSB sets the state of the SSB bit, which will be applied on filter +// load, or an error if an issue was encountered setting the value. +// The SSB bit is only usable when libseccomp API level 4 or higher is +// supported. +func (f *ScmpFilter) SetSSB(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + err := f.setFilterAttr(filterAttrSSB, toSet) + if err != nil { + if e := checkAPI("SetSSB", 4, 2, 5, 0); e != nil { + err = e + } + } + + return err +} + +// SetOptimize sets optimization level of the seccomp filter. By default +// libseccomp generates a set of sequential "if" statements for each rule in +// the filter. SetSyscallPriority can be used to prioritize the order for the +// default cause. The binary tree optimization sorts by syscall numbers and +// generates consistent O(log n) filter traversal for every rule in the filter. +// The binary tree may be advantageous for large filters. Note that +// SetSyscallPriority is ignored when level == 2. +// +// The different optimization levels are: +// 0: Reserved value, not currently used. +// 1: Rules sorted by priority and complexity (DEFAULT). +// 2: Binary tree sorted by syscall number. +func (f *ScmpFilter) SetOptimize(level int) error { + cLevel := C.uint32_t(level) + + err := f.setFilterAttr(filterAttrOptimize, cLevel) + if err != nil { + if e := checkAPI("SetOptimize", 4, 2, 5, 0); e != nil { + err = e + } + } + + return err +} + +// SetRawRC sets whether libseccomp should pass system error codes back to the +// caller, instead of the default ECANCELED. Defaults to false. +func (f *ScmpFilter) SetRawRC(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + err := f.setFilterAttr(filterAttrRawRC, toSet) + if err != nil { + if e := checkAPI("SetRawRC", 4, 2, 5, 0); e != nil { + err = e + } + } + + return err +} + +// SetSyscallPriority sets a syscall's priority. +// This provides a hint to the filter generator in libseccomp about the +// importance of this syscall. High-priority syscalls are placed +// first in the filter code, and incur less overhead (at the expense of +// lower-priority syscalls). +func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), + C.uint8_t(priority)); retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// AddRule adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, false, nil) +} + +// AddRuleExact adds a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, true, nil) +} + +// AddRuleConditional adds a single rule for a conditional action on a syscall. +// Returns an error if an issue was encountered adding the rule. +// All conditions must match for the rule to match. +func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, false, conds) +} + +// AddRuleConditionalExact adds a single rule for a conditional action on a +// syscall. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, true, conds) +} + +// ExportPFC output PFC-formatted, human-readable dump of a filter context's +// rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportPFC(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a +// filter context's rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportBPF(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return errBadFilter + } + + if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// Userspace Notification API + +// GetNotifFd returns the userspace notification file descriptor associated with the given +// filter context. Such a file descriptor is only valid after the filter has been loaded +// and only when the filter uses the ActNotify action. The file descriptor can be used to +// retrieve and respond to notifications associated with the filter (see NotifReceive(), +// NotifRespond(), and NotifIDValid()). +func (f *ScmpFilter) GetNotifFd() (ScmpFd, error) { + return f.getNotifFd() +} + +// NotifReceive retrieves a seccomp userspace notification from a filter whose ActNotify +// action has triggered. The caller is expected to process the notification and return a +// response via NotifRespond(). Each invocation of this function returns one +// notification. As multiple notifications may be pending at any time, this function is +// normally called within a polling loop. +func NotifReceive(fd ScmpFd) (*ScmpNotifReq, error) { + return notifReceive(fd) +} + +// NotifRespond responds to a notification retrieved via NotifReceive(). The response Id +// must match that of the corresponding notification retrieved via NotifReceive(). +func NotifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { + return notifRespond(fd, scmpResp) +} + +// NotifIDValid checks if a notification is still valid. An return value of nil means the +// notification is still valid. Otherwise the notification is not valid. This can be used +// to mitigate time-of-check-time-of-use (TOCTOU) attacks as described in seccomp_notify_id_valid(2). +func NotifIDValid(fd ScmpFd, id uint64) error { + return notifIDValid(fd, id) +} diff --git a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go new file mode 100644 index 0000000000..0a7fd34f51 --- /dev/null +++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -0,0 +1,884 @@ +// Internal functions for libseccomp Go bindings +// No exported functions + +package seccomp + +import ( + "errors" + "fmt" + "syscall" +) + +// Unexported C wrapping code - provides the C-Golang interface +// Get the seccomp header in scope +// Need stdlib.h for free() on cstrings + +// To compile libseccomp-golang against a specific version of libseccomp: +// cd ../libseccomp && mkdir -p prefix +// ./configure --prefix=$PWD/prefix && make && make install +// cd ../libseccomp-golang +// PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make +// LD_PRELOAD=$PWD/../libseccomp/prefix/lib/libseccomp.so.2.5.0 PKG_CONFIG_PATH=$PWD/../libseccomp/prefix/lib/pkgconfig/ make test + +// #cgo pkg-config: libseccomp +/* +#include +#include +#include + +#if (SCMP_VER_MAJOR < 2) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 3) || \ + (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 3 && SCMP_VER_MICRO < 1) +#error This package requires libseccomp >= v2.3.1 +#endif + +#define ARCH_BAD ~0 + +const uint32_t C_ARCH_BAD = ARCH_BAD; + +#ifndef SCMP_ARCH_PPC +#define SCMP_ARCH_PPC ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64 +#define SCMP_ARCH_PPC64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PPC64LE +#define SCMP_ARCH_PPC64LE ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390 +#define SCMP_ARCH_S390 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_S390X +#define SCMP_ARCH_S390X ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PARISC +#define SCMP_ARCH_PARISC ARCH_BAD +#endif + +#ifndef SCMP_ARCH_PARISC64 +#define SCMP_ARCH_PARISC64 ARCH_BAD +#endif + +#ifndef SCMP_ARCH_RISCV64 +#define SCMP_ARCH_RISCV64 ARCH_BAD +#endif + +const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; +const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; +const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; +const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; +const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; +const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; +const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; +const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; +const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; +const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; +const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; +const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; +const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC; +const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; +const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; +const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; +const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; +const uint32_t C_ARCH_PARISC = SCMP_ARCH_PARISC; +const uint32_t C_ARCH_PARISC64 = SCMP_ARCH_PARISC64; +const uint32_t C_ARCH_RISCV64 = SCMP_ARCH_RISCV64; + +#ifndef SCMP_ACT_LOG +#define SCMP_ACT_LOG 0x7ffc0000U +#endif + +#ifndef SCMP_ACT_KILL_PROCESS +#define SCMP_ACT_KILL_PROCESS 0x80000000U +#endif + +#ifndef SCMP_ACT_KILL_THREAD +#define SCMP_ACT_KILL_THREAD 0x00000000U +#endif + +#ifndef SCMP_ACT_NOTIFY +#define SCMP_ACT_NOTIFY 0x7fc00000U +#endif + +const uint32_t C_ACT_KILL = SCMP_ACT_KILL; +const uint32_t C_ACT_KILL_PROCESS = SCMP_ACT_KILL_PROCESS; +const uint32_t C_ACT_KILL_THREAD = SCMP_ACT_KILL_THREAD; +const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; +const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); +const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); +const uint32_t C_ACT_LOG = SCMP_ACT_LOG; +const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; +const uint32_t C_ACT_NOTIFY = SCMP_ACT_NOTIFY; + +// The libseccomp SCMP_FLTATR_CTL_LOG member of the scmp_filter_attr enum was +// added in v2.4.0 +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4 +#define SCMP_FLTATR_CTL_LOG _SCMP_FLTATR_MIN +#endif + +// The following SCMP_FLTATR_* were added in libseccomp v2.5.0. +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5 +#define SCMP_FLTATR_CTL_SSB _SCMP_FLTATR_MIN +#define SCMP_FLTATR_CTL_OPTIMIZE _SCMP_FLTATR_MIN +#define SCMP_FLTATR_API_SYSRAWRC _SCMP_FLTATR_MIN +#endif + +const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; +const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; +const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; +const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; +const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; +const uint32_t C_ATTRIBUTE_SSB = (uint32_t)SCMP_FLTATR_CTL_SSB; +const uint32_t C_ATTRIBUTE_OPTIMIZE = (uint32_t)SCMP_FLTATR_CTL_OPTIMIZE; +const uint32_t C_ATTRIBUTE_SYSRAWRC = (uint32_t)SCMP_FLTATR_API_SYSRAWRC; + +const int C_CMP_NE = (int)SCMP_CMP_NE; +const int C_CMP_LT = (int)SCMP_CMP_LT; +const int C_CMP_LE = (int)SCMP_CMP_LE; +const int C_CMP_EQ = (int)SCMP_CMP_EQ; +const int C_CMP_GE = (int)SCMP_CMP_GE; +const int C_CMP_GT = (int)SCMP_CMP_GT; +const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; + +const int C_VERSION_MAJOR = SCMP_VER_MAJOR; +const int C_VERSION_MINOR = SCMP_VER_MINOR; +const int C_VERSION_MICRO = SCMP_VER_MICRO; + +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 3 +unsigned int get_major_version() +{ + return seccomp_version()->major; +} + +unsigned int get_minor_version() +{ + return seccomp_version()->minor; +} + +unsigned int get_micro_version() +{ + return seccomp_version()->micro; +} +#else +unsigned int get_major_version() +{ + return (unsigned int)C_VERSION_MAJOR; +} + +unsigned int get_minor_version() +{ + return (unsigned int)C_VERSION_MINOR; +} + +unsigned int get_micro_version() +{ + return (unsigned int)C_VERSION_MICRO; +} +#endif + +// The libseccomp API level functions were added in v2.4.0 +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4 +const unsigned int seccomp_api_get(void) +{ + // libseccomp-golang requires libseccomp v2.2.0, at a minimum, which + // supported API level 2. However, the kernel may not support API level + // 2 constructs which are the seccomp() system call and the TSYNC + // filter flag. Return the "reserved" value of 0 here to indicate that + // proper API level support is not available in libseccomp. + return 0; +} + +int seccomp_api_set(unsigned int level) +{ + return -EOPNOTSUPP; +} +#endif + +typedef struct scmp_arg_cmp* scmp_cast_t; + +void* make_arg_cmp_array(unsigned int length) +{ + return calloc(length, sizeof(struct scmp_arg_cmp)); +} + +// Wrapper to add an scmp_arg_cmp struct to an existing arg_cmp array +void add_struct_arg_cmp( + struct scmp_arg_cmp* arr, + unsigned int pos, + unsigned int arg, + int compare, + uint64_t a, + uint64_t b + ) +{ + arr[pos].arg = arg; + arr[pos].op = compare; + arr[pos].datum_a = a; + arr[pos].datum_b = b; + + return; +} + +// The seccomp notify API functions were added in v2.5.0 +#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 5 + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +int seccomp_notify_alloc(struct seccomp_notif **req, struct seccomp_notif_resp **resp) { + return -EOPNOTSUPP; +} +int seccomp_notify_fd(const scmp_filter_ctx ctx) { + return -EOPNOTSUPP; +} +void seccomp_notify_free(struct seccomp_notif *req, struct seccomp_notif_resp *resp) { +} +int seccomp_notify_id_valid(int fd, uint64_t id) { + return -EOPNOTSUPP; +} +int seccomp_notify_receive(int fd, struct seccomp_notif *req) { + return -EOPNOTSUPP; +} +int seccomp_notify_respond(int fd, struct seccomp_notif_resp *resp) { + return -EOPNOTSUPP; +} + +#endif +*/ +import "C" + +// Nonexported types +type scmpFilterAttr uint32 + +// Nonexported constants + +const ( + filterAttrActDefault scmpFilterAttr = iota + filterAttrActBadArch + filterAttrNNP + filterAttrTsync + filterAttrLog + filterAttrSSB + filterAttrOptimize + filterAttrRawRC +) + +const ( + // An error return from certain libseccomp functions + scmpError C.int = -1 + // Comparison boundaries to check for architecture validity + archStart ScmpArch = ArchNative + archEnd ScmpArch = ArchRISCV64 + // Comparison boundaries to check for action validity + actionStart ScmpAction = ActKillThread + actionEnd ScmpAction = ActKillProcess + // Comparison boundaries to check for comparison operator validity + compareOpStart ScmpCompareOp = CompareNotEqual + compareOpEnd ScmpCompareOp = CompareMaskedEqual +) + +var ( + // errBadFilter is thrown on bad filter context. + errBadFilter = errors.New("filter is invalid or uninitialized") + errDefAction = errors.New("requested action matches default action of filter") + // Constants representing library major, minor, and micro versions + verMajor = uint(C.get_major_version()) + verMinor = uint(C.get_minor_version()) + verMicro = uint(C.get_micro_version()) +) + +// Nonexported functions + +// checkVersion returns an error if the libseccomp version being used +// is less than the one specified by major, minor, and micro arguments. +// Argument op is an arbitrary non-empty operation description, which +// is used as a part of the error message returned. +// +// Most users should use checkAPI instead. +func checkVersion(op string, major, minor, micro uint) error { + if (verMajor > major) || + (verMajor == major && verMinor > minor) || + (verMajor == major && verMinor == minor && verMicro >= micro) { + return nil + } + return &VersionError{ + op: op, + major: major, + minor: minor, + micro: micro, + } +} + +func ensureSupportedVersion() error { + return checkVersion("seccomp", 2, 3, 1) +} + +// Get the API level +func getAPI() (uint, error) { + api := C.seccomp_api_get() + if api == 0 { + return 0, errors.New("API level operations are not supported") + } + + return uint(api), nil +} + +// Set the API level +func setAPI(api uint) error { + if retCode := C.seccomp_api_set(C.uint(api)); retCode != 0 { + e := errRc(retCode) + if e == syscall.EOPNOTSUPP { + return errors.New("API level operations are not supported") + } + + return fmt.Errorf("could not set API level: %w", e) + } + + return nil +} + +// Filter helpers + +// Filter finalizer - ensure that kernel context for filters is freed +func filterFinalizer(f *ScmpFilter) { + f.Release() +} + +func errRc(rc C.int) error { + return syscall.Errno(-1 * rc) +} + +// Get a raw filter attribute +func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return 0x0, errBadFilter + } + + var attribute C.uint32_t + + retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) + if retCode != 0 { + return 0x0, errRc(retCode) + } + + return attribute, nil +} + +// Set a raw filter attribute +func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) + if retCode != 0 { + return errRc(retCode) + } + + return nil +} + +// DOES NOT LOCK OR CHECK VALIDITY +// Assumes caller has already done this +// Wrapper for seccomp_rule_add_... functions +func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, length C.uint, cond C.scmp_cast_t) error { + if length != 0 && cond == nil { + return errors.New("null conditions list, but length is nonzero") + } + + var retCode C.int + if exact { + retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } else { + retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } + + if retCode != 0 { + switch e := errRc(retCode); e { + case syscall.EFAULT: + return fmt.Errorf("unrecognized syscall %#x", int32(call)) + // libseccomp >= v2.5.0 returns EACCES, older versions return EPERM. + // TODO: remove EPERM once libseccomp < v2.5.0 is not supported. + case syscall.EPERM, syscall.EACCES: + return errDefAction + case syscall.EINVAL: + return errors.New("two checks on same syscall argument") + default: + return e + } + } + + return nil +} + +// Generic add function for filter rules +func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return errBadFilter + } + + if len(conds) == 0 { + if err := f.addRuleWrapper(call, action, exact, 0, nil); err != nil { + return err + } + } else { + argsArr := C.make_arg_cmp_array(C.uint(len(conds))) + if argsArr == nil { + return errors.New("error allocating memory for conditions") + } + defer C.free(argsArr) + + for i, cond := range conds { + C.add_struct_arg_cmp(C.scmp_cast_t(argsArr), C.uint(i), + C.uint(cond.Argument), cond.Op.toNative(), + C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) + } + + if err := f.addRuleWrapper(call, action, exact, C.uint(len(conds)), C.scmp_cast_t(argsArr)); err != nil { + return err + } + } + + return nil +} + +// Generic Helpers + +// Helper - Sanitize Arch token input +func sanitizeArch(in ScmpArch) error { + if in < archStart || in > archEnd { + return fmt.Errorf("unrecognized architecture %#x", uint(in)) + } + + if in.toNative() == C.C_ARCH_BAD { + return fmt.Errorf("architecture %v is not supported on this version of the library", in) + } + + return nil +} + +func sanitizeAction(in ScmpAction) error { + inTmp := in & 0x0000FFFF + if inTmp < actionStart || inTmp > actionEnd { + return fmt.Errorf("unrecognized action %#x", uint(inTmp)) + } + + if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { + return errors.New("highest 16 bits must be zeroed except for Trace and Errno") + } + + return nil +} + +func sanitizeCompareOp(in ScmpCompareOp) error { + if in < compareOpStart || in > compareOpEnd { + return fmt.Errorf("unrecognized comparison operator %#x", uint(in)) + } + + return nil +} + +func archFromNative(a C.uint32_t) (ScmpArch, error) { + switch a { + case C.C_ARCH_X86: + return ArchX86, nil + case C.C_ARCH_X86_64: + return ArchAMD64, nil + case C.C_ARCH_X32: + return ArchX32, nil + case C.C_ARCH_ARM: + return ArchARM, nil + case C.C_ARCH_NATIVE: + return ArchNative, nil + case C.C_ARCH_AARCH64: + return ArchARM64, nil + case C.C_ARCH_MIPS: + return ArchMIPS, nil + case C.C_ARCH_MIPS64: + return ArchMIPS64, nil + case C.C_ARCH_MIPS64N32: + return ArchMIPS64N32, nil + case C.C_ARCH_MIPSEL: + return ArchMIPSEL, nil + case C.C_ARCH_MIPSEL64: + return ArchMIPSEL64, nil + case C.C_ARCH_MIPSEL64N32: + return ArchMIPSEL64N32, nil + case C.C_ARCH_PPC: + return ArchPPC, nil + case C.C_ARCH_PPC64: + return ArchPPC64, nil + case C.C_ARCH_PPC64LE: + return ArchPPC64LE, nil + case C.C_ARCH_S390: + return ArchS390, nil + case C.C_ARCH_S390X: + return ArchS390X, nil + case C.C_ARCH_PARISC: + return ArchPARISC, nil + case C.C_ARCH_PARISC64: + return ArchPARISC64, nil + case C.C_ARCH_RISCV64: + return ArchRISCV64, nil + default: + return 0x0, fmt.Errorf("unrecognized architecture %#x", uint32(a)) + } +} + +// Only use with sanitized arches, no error handling +func (a ScmpArch) toNative() C.uint32_t { + switch a { + case ArchX86: + return C.C_ARCH_X86 + case ArchAMD64: + return C.C_ARCH_X86_64 + case ArchX32: + return C.C_ARCH_X32 + case ArchARM: + return C.C_ARCH_ARM + case ArchARM64: + return C.C_ARCH_AARCH64 + case ArchMIPS: + return C.C_ARCH_MIPS + case ArchMIPS64: + return C.C_ARCH_MIPS64 + case ArchMIPS64N32: + return C.C_ARCH_MIPS64N32 + case ArchMIPSEL: + return C.C_ARCH_MIPSEL + case ArchMIPSEL64: + return C.C_ARCH_MIPSEL64 + case ArchMIPSEL64N32: + return C.C_ARCH_MIPSEL64N32 + case ArchPPC: + return C.C_ARCH_PPC + case ArchPPC64: + return C.C_ARCH_PPC64 + case ArchPPC64LE: + return C.C_ARCH_PPC64LE + case ArchS390: + return C.C_ARCH_S390 + case ArchS390X: + return C.C_ARCH_S390X + case ArchPARISC: + return C.C_ARCH_PARISC + case ArchPARISC64: + return C.C_ARCH_PARISC64 + case ArchRISCV64: + return C.C_ARCH_RISCV64 + case ArchNative: + return C.C_ARCH_NATIVE + default: + return 0x0 + } +} + +// Only use with sanitized ops, no error handling +func (a ScmpCompareOp) toNative() C.int { + switch a { + case CompareNotEqual: + return C.C_CMP_NE + case CompareLess: + return C.C_CMP_LT + case CompareLessOrEqual: + return C.C_CMP_LE + case CompareEqual: + return C.C_CMP_EQ + case CompareGreaterEqual: + return C.C_CMP_GE + case CompareGreater: + return C.C_CMP_GT + case CompareMaskedEqual: + return C.C_CMP_MASKED_EQ + default: + return 0x0 + } +} + +func actionFromNative(a C.uint32_t) (ScmpAction, error) { + aTmp := a & 0xFFFF + switch a & 0xFFFF0000 { + case C.C_ACT_KILL_PROCESS: + return ActKillProcess, nil + case C.C_ACT_KILL_THREAD: + return ActKillThread, nil + case C.C_ACT_TRAP: + return ActTrap, nil + case C.C_ACT_ERRNO: + return ActErrno.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_TRACE: + return ActTrace.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_LOG: + return ActLog, nil + case C.C_ACT_ALLOW: + return ActAllow, nil + case C.C_ACT_NOTIFY: + return ActNotify, nil + default: + return 0x0, fmt.Errorf("unrecognized action %#x", uint32(a)) + } +} + +// Only use with sanitized actions, no error handling +func (a ScmpAction) toNative() C.uint32_t { + switch a & 0xFFFF { + case ActKillProcess: + return C.C_ACT_KILL_PROCESS + case ActKillThread: + return C.C_ACT_KILL_THREAD + case ActTrap: + return C.C_ACT_TRAP + case ActErrno: + return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) + case ActTrace: + return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) + case ActLog: + return C.C_ACT_LOG + case ActAllow: + return C.C_ACT_ALLOW + case ActNotify: + return C.C_ACT_NOTIFY + default: + return 0x0 + } +} + +// Internal only, assumes safe attribute +func (a scmpFilterAttr) toNative() uint32 { + switch a { + case filterAttrActDefault: + return uint32(C.C_ATTRIBUTE_DEFAULT) + case filterAttrActBadArch: + return uint32(C.C_ATTRIBUTE_BADARCH) + case filterAttrNNP: + return uint32(C.C_ATTRIBUTE_NNP) + case filterAttrTsync: + return uint32(C.C_ATTRIBUTE_TSYNC) + case filterAttrLog: + return uint32(C.C_ATTRIBUTE_LOG) + case filterAttrSSB: + return uint32(C.C_ATTRIBUTE_SSB) + case filterAttrOptimize: + return uint32(C.C_ATTRIBUTE_OPTIMIZE) + case filterAttrRawRC: + return uint32(C.C_ATTRIBUTE_SYSRAWRC) + default: + return 0x0 + } +} + +func syscallFromNative(a C.int) ScmpSyscall { + return ScmpSyscall(a) +} + +func notifReqFromNative(req *C.struct_seccomp_notif) (*ScmpNotifReq, error) { + scmpArgs := make([]uint64, 6) + for i := 0; i < len(scmpArgs); i++ { + scmpArgs[i] = uint64(req.data.args[i]) + } + + arch, err := archFromNative(req.data.arch) + if err != nil { + return nil, err + } + + scmpData := ScmpNotifData{ + Syscall: syscallFromNative(req.data.nr), + Arch: arch, + InstrPointer: uint64(req.data.instruction_pointer), + Args: scmpArgs, + } + + scmpReq := &ScmpNotifReq{ + ID: uint64(req.id), + Pid: uint32(req.pid), + Flags: uint32(req.flags), + Data: scmpData, + } + + return scmpReq, nil +} + +func (scmpResp *ScmpNotifResp) toNative(resp *C.struct_seccomp_notif_resp) { + resp.id = C.__u64(scmpResp.ID) + resp.val = C.__s64(scmpResp.Val) + resp.error = (C.__s32(scmpResp.Error) * -1) // kernel requires a negated value + resp.flags = C.__u32(scmpResp.Flags) +} + +// checkAPI checks that both the API level and the seccomp version is equal to +// or greater than the specified minLevel and major, minor, micro, +// respectively, and returns an error otherwise. Argument op is an arbitrary +// non-empty operation description, used as a part of the error message +// returned. +func checkAPI(op string, minLevel uint, major, minor, micro uint) error { + // Ignore error from getAPI, as it returns level == 0 in case of error. + level, _ := getAPI() + if level >= minLevel { + return checkVersion(op, major, minor, micro) + } + return &VersionError{ + op: op, + curAPI: level, + minAPI: minLevel, + major: major, + minor: minor, + micro: micro, + } +} + +// Userspace Notification API +// Calls to C.seccomp_notify* hidden from seccomp.go + +func notifSupported() error { + return checkAPI("seccomp notification", 6, 2, 5, 0) +} + +func (f *ScmpFilter) getNotifFd() (ScmpFd, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return -1, errBadFilter + } + if err := notifSupported(); err != nil { + return -1, err + } + + fd := C.seccomp_notify_fd(f.filterCtx) + + return ScmpFd(fd), nil +} + +func notifReceive(fd ScmpFd) (*ScmpNotifReq, error) { + var req *C.struct_seccomp_notif + var resp *C.struct_seccomp_notif_resp + + if err := notifSupported(); err != nil { + return nil, err + } + + // we only use the request here; the response is unused + if retCode := C.seccomp_notify_alloc(&req, &resp); retCode != 0 { + return nil, errRc(retCode) + } + + defer func() { + C.seccomp_notify_free(req, resp) + }() + + for { + retCode, errno := C.seccomp_notify_receive(C.int(fd), req) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return nil, errno + } + + return nil, errRc(retCode) + } + + return notifReqFromNative(req) +} + +func notifRespond(fd ScmpFd, scmpResp *ScmpNotifResp) error { + var req *C.struct_seccomp_notif + var resp *C.struct_seccomp_notif_resp + + if err := notifSupported(); err != nil { + return err + } + + // we only use the response here; the request is discarded + if retCode := C.seccomp_notify_alloc(&req, &resp); retCode != 0 { + return errRc(retCode) + } + + defer func() { + C.seccomp_notify_free(req, resp) + }() + + scmpResp.toNative(resp) + + for { + retCode, errno := C.seccomp_notify_respond(C.int(fd), resp) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return errno + } + + return errRc(retCode) + } + + return nil +} + +func notifIDValid(fd ScmpFd, id uint64) error { + if err := notifSupported(); err != nil { + return err + } + + for { + retCode, errno := C.seccomp_notify_id_valid(C.int(fd), C.uint64_t(id)) + if retCode == 0 { + break + } + + if errno == syscall.EINTR { + continue + } + + if errno == syscall.ENOENT { + return errno + } + + return errRc(retCode) + } + + return nil +} diff --git a/vendor/golang.org/x/sys/unix/affinity_linux.go b/vendor/golang.org/x/sys/unix/affinity_linux.go index 6e5c81acd0..3ea470387b 100644 --- a/vendor/golang.org/x/sys/unix/affinity_linux.go +++ b/vendor/golang.org/x/sys/unix/affinity_linux.go @@ -38,8 +38,15 @@ func SchedSetaffinity(pid int, set *CPUSet) error { // Zero clears the set s, so that it contains no CPUs. func (s *CPUSet) Zero() { + clear(s[:]) +} + +// Fill adds all possible CPU bits to the set s. On Linux, [SchedSetaffinity] +// will silently ignore any invalid CPU bits in [CPUSet] so this is an +// efficient way of resetting the CPU affinity of a process. +func (s *CPUSet) Fill() { for i := range s { - s[i] = 0 + s[i] = ^cpuMask(0) } } diff --git a/vendor/golang.org/x/sys/unix/fdset.go b/vendor/golang.org/x/sys/unix/fdset.go index 9e83d18cd0..62ed12645f 100644 --- a/vendor/golang.org/x/sys/unix/fdset.go +++ b/vendor/golang.org/x/sys/unix/fdset.go @@ -23,7 +23,5 @@ func (fds *FdSet) IsSet(fd int) bool { // Zero clears the set fds. func (fds *FdSet) Zero() { - for i := range fds.Bits { - fds.Bits[i] = 0 - } + clear(fds.Bits[:]) } diff --git a/vendor/golang.org/x/sys/unix/ifreq_linux.go b/vendor/golang.org/x/sys/unix/ifreq_linux.go index 848840ae4c..309f5a2b0c 100644 --- a/vendor/golang.org/x/sys/unix/ifreq_linux.go +++ b/vendor/golang.org/x/sys/unix/ifreq_linux.go @@ -111,9 +111,7 @@ func (ifr *Ifreq) SetUint32(v uint32) { // clear zeroes the ifreq's union field to prevent trailing garbage data from // being sent to the kernel if an ifreq is reused. func (ifr *Ifreq) clear() { - for i := range ifr.raw.Ifru { - ifr.raw.Ifru[i] = 0 - } + clear(ifr.raw.Ifru[:]) } // TODO(mdlayher): export as IfreqData? For now we can provide helpers such as diff --git a/vendor/golang.org/x/sys/unix/mkall.sh b/vendor/golang.org/x/sys/unix/mkall.sh index e6f31d374d..d0ed611912 100644 --- a/vendor/golang.org/x/sys/unix/mkall.sh +++ b/vendor/golang.org/x/sys/unix/mkall.sh @@ -49,6 +49,7 @@ esac if [[ "$GOOS" = "linux" ]]; then # Use the Docker-based build system # Files generated through docker (use $cmd so you can Ctl-C the build or run) + set -e $cmd docker build --tag generate:$GOOS $GOOS $cmd docker run --interactive --tty --volume $(cd -- "$(dirname -- "$0")/.." && pwd):/build generate:$GOOS exit diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index d1c8b2640e..fd39be4efd 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -226,6 +226,7 @@ struct ltchars { #include #include #include +#include #include #include #include @@ -255,6 +256,7 @@ struct ltchars { #include #include #include +#include #include #include #include @@ -529,6 +531,7 @@ ccflags="$@" $2 ~ /^O[CNPFPL][A-Z]+[^_][A-Z]+$/ || $2 ~ /^(NL|CR|TAB|BS|VT|FF)DLY$/ || $2 ~ /^(NL|CR|TAB|BS|VT|FF)[0-9]$/ || + $2 ~ /^(DT|EI|ELF|EV|NN|NT|PF|SHF|SHN|SHT|STB|STT|VER)_/ || $2 ~ /^O?XTABS$/ || $2 ~ /^TC[IO](ON|OFF)$/ || $2 ~ /^IN_/ || @@ -611,7 +614,7 @@ ccflags="$@" $2 !~ /IOC_MAGIC/ && $2 ~ /^[A-Z][A-Z0-9_]+_MAGIC2?$/ || $2 ~ /^(VM|VMADDR)_/ || - $2 ~ /^IOCTL_VM_SOCKETS_/ || + $2 ~ /^(IOCTL_VM_SOCKETS_|IOCTL_MEI_)/ || $2 ~ /^(TASKSTATS|TS)_/ || $2 ~ /^CGROUPSTATS_/ || $2 ~ /^GENL_/ || diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 4958a65708..06c0eea6fb 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -801,9 +801,7 @@ func (sa *SockaddrPPPoE) sockaddr() (unsafe.Pointer, _Socklen, error) { // one. The kernel expects SID to be in network byte order. binary.BigEndian.PutUint16(sa.raw[6:8], sa.SID) copy(sa.raw[8:14], sa.Remote) - for i := 14; i < 14+IFNAMSIZ; i++ { - sa.raw[i] = 0 - } + clear(sa.raw[14 : 14+IFNAMSIZ]) copy(sa.raw[14:], sa.Dev) return unsafe.Pointer(&sa.raw), SizeofSockaddrPPPoX, nil } @@ -2645,3 +2643,9 @@ func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) { //sys Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error) //sys Mseal(b []byte, flags uint) (err error) + +//sys setMemPolicy(mode int, mask *CPUSet, size int) (err error) = SYS_SET_MEMPOLICY + +func SetMemPolicy(mode int, mask *CPUSet) error { + return setMemPolicy(mode, mask, _CPU_SETSIZE) +} diff --git a/vendor/golang.org/x/sys/unix/syscall_netbsd.go b/vendor/golang.org/x/sys/unix/syscall_netbsd.go index 88162099af..34a4676973 100644 --- a/vendor/golang.org/x/sys/unix/syscall_netbsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_netbsd.go @@ -248,6 +248,23 @@ func Statvfs(path string, buf *Statvfs_t) (err error) { return Statvfs1(path, buf, ST_WAIT) } +func Getvfsstat(buf []Statvfs_t, flags int) (n int, err error) { + var ( + _p0 unsafe.Pointer + bufsize uintptr + ) + if len(buf) > 0 { + _p0 = unsafe.Pointer(&buf[0]) + bufsize = unsafe.Sizeof(Statvfs_t{}) * uintptr(len(buf)) + } + r0, _, e1 := Syscall(SYS_GETVFSSTAT, uintptr(_p0), bufsize, uintptr(flags)) + n = int(r0) + if e1 != 0 { + err = e1 + } + return +} + /* * Exposed directly */ diff --git a/vendor/golang.org/x/sys/unix/syscall_solaris.go b/vendor/golang.org/x/sys/unix/syscall_solaris.go index abc3955477..18a3d9bdab 100644 --- a/vendor/golang.org/x/sys/unix/syscall_solaris.go +++ b/vendor/golang.org/x/sys/unix/syscall_solaris.go @@ -629,7 +629,7 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e //sys Kill(pid int, signum syscall.Signal) (err error) //sys Lchown(path string, uid int, gid int) (err error) //sys Link(path string, link string) (err error) -//sys Listen(s int, backlog int) (err error) = libsocket.__xnet_llisten +//sys Listen(s int, backlog int) (err error) = libsocket.__xnet_listen //sys Lstat(path string, stat *Stat_t) (err error) //sys Madvise(b []byte, advice int) (err error) //sys Mkdir(path string, mode uint32) (err error) diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index b6db27d937..120a7b35d1 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -853,20 +853,86 @@ const ( DM_VERSION_MAJOR = 0x4 DM_VERSION_MINOR = 0x32 DM_VERSION_PATCHLEVEL = 0x0 + DT_ADDRRNGHI = 0x6ffffeff + DT_ADDRRNGLO = 0x6ffffe00 DT_BLK = 0x6 DT_CHR = 0x2 + DT_DEBUG = 0x15 DT_DIR = 0x4 + DT_ENCODING = 0x20 DT_FIFO = 0x1 + DT_FINI = 0xd + DT_FLAGS_1 = 0x6ffffffb + DT_GNU_HASH = 0x6ffffef5 + DT_HASH = 0x4 + DT_HIOS = 0x6ffff000 + DT_HIPROC = 0x7fffffff + DT_INIT = 0xc + DT_JMPREL = 0x17 DT_LNK = 0xa + DT_LOOS = 0x6000000d + DT_LOPROC = 0x70000000 + DT_NEEDED = 0x1 + DT_NULL = 0x0 + DT_PLTGOT = 0x3 + DT_PLTREL = 0x14 + DT_PLTRELSZ = 0x2 DT_REG = 0x8 + DT_REL = 0x11 + DT_RELA = 0x7 + DT_RELACOUNT = 0x6ffffff9 + DT_RELAENT = 0x9 + DT_RELASZ = 0x8 + DT_RELCOUNT = 0x6ffffffa + DT_RELENT = 0x13 + DT_RELSZ = 0x12 + DT_RPATH = 0xf DT_SOCK = 0xc + DT_SONAME = 0xe + DT_STRSZ = 0xa + DT_STRTAB = 0x5 + DT_SYMBOLIC = 0x10 + DT_SYMENT = 0xb + DT_SYMTAB = 0x6 + DT_TEXTREL = 0x16 DT_UNKNOWN = 0x0 + DT_VALRNGHI = 0x6ffffdff + DT_VALRNGLO = 0x6ffffd00 + DT_VERDEF = 0x6ffffffc + DT_VERDEFNUM = 0x6ffffffd + DT_VERNEED = 0x6ffffffe + DT_VERNEEDNUM = 0x6fffffff + DT_VERSYM = 0x6ffffff0 DT_WHT = 0xe ECHO = 0x8 ECRYPTFS_SUPER_MAGIC = 0xf15f EFD_SEMAPHORE = 0x1 EFIVARFS_MAGIC = 0xde5e81e4 EFS_SUPER_MAGIC = 0x414a53 + EI_CLASS = 0x4 + EI_DATA = 0x5 + EI_MAG0 = 0x0 + EI_MAG1 = 0x1 + EI_MAG2 = 0x2 + EI_MAG3 = 0x3 + EI_NIDENT = 0x10 + EI_OSABI = 0x7 + EI_PAD = 0x8 + EI_VERSION = 0x6 + ELFCLASS32 = 0x1 + ELFCLASS64 = 0x2 + ELFCLASSNONE = 0x0 + ELFCLASSNUM = 0x3 + ELFDATA2LSB = 0x1 + ELFDATA2MSB = 0x2 + ELFDATANONE = 0x0 + ELFMAG = "\177ELF" + ELFMAG0 = 0x7f + ELFMAG1 = 'E' + ELFMAG2 = 'L' + ELFMAG3 = 'F' + ELFOSABI_LINUX = 0x3 + ELFOSABI_NONE = 0x0 EM_386 = 0x3 EM_486 = 0x6 EM_68K = 0x4 @@ -1152,14 +1218,24 @@ const ( ETH_P_WCCP = 0x883e ETH_P_X25 = 0x805 ETH_P_XDSA = 0xf8 + ET_CORE = 0x4 + ET_DYN = 0x3 + ET_EXEC = 0x2 + ET_HIPROC = 0xffff + ET_LOPROC = 0xff00 + ET_NONE = 0x0 + ET_REL = 0x1 EV_ABS = 0x3 EV_CNT = 0x20 + EV_CURRENT = 0x1 EV_FF = 0x15 EV_FF_STATUS = 0x17 EV_KEY = 0x1 EV_LED = 0x11 EV_MAX = 0x1f EV_MSC = 0x4 + EV_NONE = 0x0 + EV_NUM = 0x2 EV_PWR = 0x16 EV_REL = 0x2 EV_REP = 0x14 @@ -1539,6 +1615,8 @@ const ( IN_OPEN = 0x20 IN_Q_OVERFLOW = 0x4000 IN_UNMOUNT = 0x2000 + IOCTL_MEI_CONNECT_CLIENT = 0xc0104801 + IOCTL_MEI_CONNECT_CLIENT_VTAG = 0xc0144804 IPPROTO_AH = 0x33 IPPROTO_BEETPH = 0x5e IPPROTO_COMP = 0x6c @@ -2276,7 +2354,167 @@ const ( NLM_F_REPLACE = 0x100 NLM_F_REQUEST = 0x1 NLM_F_ROOT = 0x100 + NN_386_IOPERM = "LINUX" + NN_386_TLS = "LINUX" + NN_ARC_V2 = "LINUX" + NN_ARM_FPMR = "LINUX" + NN_ARM_GCS = "LINUX" + NN_ARM_HW_BREAK = "LINUX" + NN_ARM_HW_WATCH = "LINUX" + NN_ARM_PACA_KEYS = "LINUX" + NN_ARM_PACG_KEYS = "LINUX" + NN_ARM_PAC_ENABLED_KEYS = "LINUX" + NN_ARM_PAC_MASK = "LINUX" + NN_ARM_POE = "LINUX" + NN_ARM_SSVE = "LINUX" + NN_ARM_SVE = "LINUX" + NN_ARM_SYSTEM_CALL = "LINUX" + NN_ARM_TAGGED_ADDR_CTRL = "LINUX" + NN_ARM_TLS = "LINUX" + NN_ARM_VFP = "LINUX" + NN_ARM_ZA = "LINUX" + NN_ARM_ZT = "LINUX" + NN_AUXV = "CORE" + NN_FILE = "CORE" + NN_GNU_PROPERTY_TYPE_0 = "GNU" + NN_LOONGARCH_CPUCFG = "LINUX" + NN_LOONGARCH_CSR = "LINUX" + NN_LOONGARCH_HW_BREAK = "LINUX" + NN_LOONGARCH_HW_WATCH = "LINUX" + NN_LOONGARCH_LASX = "LINUX" + NN_LOONGARCH_LBT = "LINUX" + NN_LOONGARCH_LSX = "LINUX" + NN_MIPS_DSP = "LINUX" + NN_MIPS_FP_MODE = "LINUX" + NN_MIPS_MSA = "LINUX" + NN_PPC_DEXCR = "LINUX" + NN_PPC_DSCR = "LINUX" + NN_PPC_EBB = "LINUX" + NN_PPC_HASHKEYR = "LINUX" + NN_PPC_PKEY = "LINUX" + NN_PPC_PMU = "LINUX" + NN_PPC_PPR = "LINUX" + NN_PPC_SPE = "LINUX" + NN_PPC_TAR = "LINUX" + NN_PPC_TM_CDSCR = "LINUX" + NN_PPC_TM_CFPR = "LINUX" + NN_PPC_TM_CGPR = "LINUX" + NN_PPC_TM_CPPR = "LINUX" + NN_PPC_TM_CTAR = "LINUX" + NN_PPC_TM_CVMX = "LINUX" + NN_PPC_TM_CVSX = "LINUX" + NN_PPC_TM_SPR = "LINUX" + NN_PPC_VMX = "LINUX" + NN_PPC_VSX = "LINUX" + NN_PRFPREG = "CORE" + NN_PRPSINFO = "CORE" + NN_PRSTATUS = "CORE" + NN_PRXFPREG = "LINUX" + NN_RISCV_CSR = "LINUX" + NN_RISCV_TAGGED_ADDR_CTRL = "LINUX" + NN_RISCV_VECTOR = "LINUX" + NN_S390_CTRS = "LINUX" + NN_S390_GS_BC = "LINUX" + NN_S390_GS_CB = "LINUX" + NN_S390_HIGH_GPRS = "LINUX" + NN_S390_LAST_BREAK = "LINUX" + NN_S390_PREFIX = "LINUX" + NN_S390_PV_CPU_DATA = "LINUX" + NN_S390_RI_CB = "LINUX" + NN_S390_SYSTEM_CALL = "LINUX" + NN_S390_TDB = "LINUX" + NN_S390_TIMER = "LINUX" + NN_S390_TODCMP = "LINUX" + NN_S390_TODPREG = "LINUX" + NN_S390_VXRS_HIGH = "LINUX" + NN_S390_VXRS_LOW = "LINUX" + NN_SIGINFO = "CORE" + NN_TASKSTRUCT = "CORE" + NN_VMCOREDD = "LINUX" + NN_X86_SHSTK = "LINUX" + NN_X86_XSAVE_LAYOUT = "LINUX" + NN_X86_XSTATE = "LINUX" NSFS_MAGIC = 0x6e736673 + NT_386_IOPERM = 0x201 + NT_386_TLS = 0x200 + NT_ARC_V2 = 0x600 + NT_ARM_FPMR = 0x40e + NT_ARM_GCS = 0x410 + NT_ARM_HW_BREAK = 0x402 + NT_ARM_HW_WATCH = 0x403 + NT_ARM_PACA_KEYS = 0x407 + NT_ARM_PACG_KEYS = 0x408 + NT_ARM_PAC_ENABLED_KEYS = 0x40a + NT_ARM_PAC_MASK = 0x406 + NT_ARM_POE = 0x40f + NT_ARM_SSVE = 0x40b + NT_ARM_SVE = 0x405 + NT_ARM_SYSTEM_CALL = 0x404 + NT_ARM_TAGGED_ADDR_CTRL = 0x409 + NT_ARM_TLS = 0x401 + NT_ARM_VFP = 0x400 + NT_ARM_ZA = 0x40c + NT_ARM_ZT = 0x40d + NT_AUXV = 0x6 + NT_FILE = 0x46494c45 + NT_GNU_PROPERTY_TYPE_0 = 0x5 + NT_LOONGARCH_CPUCFG = 0xa00 + NT_LOONGARCH_CSR = 0xa01 + NT_LOONGARCH_HW_BREAK = 0xa05 + NT_LOONGARCH_HW_WATCH = 0xa06 + NT_LOONGARCH_LASX = 0xa03 + NT_LOONGARCH_LBT = 0xa04 + NT_LOONGARCH_LSX = 0xa02 + NT_MIPS_DSP = 0x800 + NT_MIPS_FP_MODE = 0x801 + NT_MIPS_MSA = 0x802 + NT_PPC_DEXCR = 0x111 + NT_PPC_DSCR = 0x105 + NT_PPC_EBB = 0x106 + NT_PPC_HASHKEYR = 0x112 + NT_PPC_PKEY = 0x110 + NT_PPC_PMU = 0x107 + NT_PPC_PPR = 0x104 + NT_PPC_SPE = 0x101 + NT_PPC_TAR = 0x103 + NT_PPC_TM_CDSCR = 0x10f + NT_PPC_TM_CFPR = 0x109 + NT_PPC_TM_CGPR = 0x108 + NT_PPC_TM_CPPR = 0x10e + NT_PPC_TM_CTAR = 0x10d + NT_PPC_TM_CVMX = 0x10a + NT_PPC_TM_CVSX = 0x10b + NT_PPC_TM_SPR = 0x10c + NT_PPC_VMX = 0x100 + NT_PPC_VSX = 0x102 + NT_PRFPREG = 0x2 + NT_PRPSINFO = 0x3 + NT_PRSTATUS = 0x1 + NT_PRXFPREG = 0x46e62b7f + NT_RISCV_CSR = 0x900 + NT_RISCV_TAGGED_ADDR_CTRL = 0x902 + NT_RISCV_VECTOR = 0x901 + NT_S390_CTRS = 0x304 + NT_S390_GS_BC = 0x30c + NT_S390_GS_CB = 0x30b + NT_S390_HIGH_GPRS = 0x300 + NT_S390_LAST_BREAK = 0x306 + NT_S390_PREFIX = 0x305 + NT_S390_PV_CPU_DATA = 0x30e + NT_S390_RI_CB = 0x30d + NT_S390_SYSTEM_CALL = 0x307 + NT_S390_TDB = 0x308 + NT_S390_TIMER = 0x301 + NT_S390_TODCMP = 0x302 + NT_S390_TODPREG = 0x303 + NT_S390_VXRS_HIGH = 0x30a + NT_S390_VXRS_LOW = 0x309 + NT_SIGINFO = 0x53494749 + NT_TASKSTRUCT = 0x4 + NT_VMCOREDD = 0x700 + NT_X86_SHSTK = 0x204 + NT_X86_XSAVE_LAYOUT = 0x205 + NT_X86_XSTATE = 0x202 OCFS2_SUPER_MAGIC = 0x7461636f OCRNL = 0x8 OFDEL = 0x80 @@ -2463,6 +2701,59 @@ const ( PERF_RECORD_MISC_USER = 0x2 PERF_SAMPLE_BRANCH_PLM_ALL = 0x7 PERF_SAMPLE_WEIGHT_TYPE = 0x1004000 + PF_ALG = 0x26 + PF_APPLETALK = 0x5 + PF_ASH = 0x12 + PF_ATMPVC = 0x8 + PF_ATMSVC = 0x14 + PF_AX25 = 0x3 + PF_BLUETOOTH = 0x1f + PF_BRIDGE = 0x7 + PF_CAIF = 0x25 + PF_CAN = 0x1d + PF_DECnet = 0xc + PF_ECONET = 0x13 + PF_FILE = 0x1 + PF_IB = 0x1b + PF_IEEE802154 = 0x24 + PF_INET = 0x2 + PF_INET6 = 0xa + PF_IPX = 0x4 + PF_IRDA = 0x17 + PF_ISDN = 0x22 + PF_IUCV = 0x20 + PF_KCM = 0x29 + PF_KEY = 0xf + PF_LLC = 0x1a + PF_LOCAL = 0x1 + PF_MAX = 0x2e + PF_MCTP = 0x2d + PF_MPLS = 0x1c + PF_NETBEUI = 0xd + PF_NETLINK = 0x10 + PF_NETROM = 0x6 + PF_NFC = 0x27 + PF_PACKET = 0x11 + PF_PHONET = 0x23 + PF_PPPOX = 0x18 + PF_QIPCRTR = 0x2a + PF_R = 0x4 + PF_RDS = 0x15 + PF_ROSE = 0xb + PF_ROUTE = 0x10 + PF_RXRPC = 0x21 + PF_SECURITY = 0xe + PF_SMC = 0x2b + PF_SNA = 0x16 + PF_TIPC = 0x1e + PF_UNIX = 0x1 + PF_UNSPEC = 0x0 + PF_VSOCK = 0x28 + PF_W = 0x2 + PF_WANPIPE = 0x19 + PF_X = 0x1 + PF_X25 = 0x9 + PF_XDP = 0x2c PID_FS_MAGIC = 0x50494446 PIPEFS_MAGIC = 0x50495045 PPPIOCGNPMODE = 0xc008744c @@ -2758,6 +3049,23 @@ const ( PTRACE_SYSCALL_INFO_NONE = 0x0 PTRACE_SYSCALL_INFO_SECCOMP = 0x3 PTRACE_TRACEME = 0x0 + PT_AARCH64_MEMTAG_MTE = 0x70000002 + PT_DYNAMIC = 0x2 + PT_GNU_EH_FRAME = 0x6474e550 + PT_GNU_PROPERTY = 0x6474e553 + PT_GNU_RELRO = 0x6474e552 + PT_GNU_STACK = 0x6474e551 + PT_HIOS = 0x6fffffff + PT_HIPROC = 0x7fffffff + PT_INTERP = 0x3 + PT_LOAD = 0x1 + PT_LOOS = 0x60000000 + PT_LOPROC = 0x70000000 + PT_NOTE = 0x4 + PT_NULL = 0x0 + PT_PHDR = 0x6 + PT_SHLIB = 0x5 + PT_TLS = 0x7 P_ALL = 0x0 P_PGID = 0x2 P_PID = 0x1 @@ -3091,6 +3399,47 @@ const ( SEEK_MAX = 0x4 SEEK_SET = 0x0 SELINUX_MAGIC = 0xf97cff8c + SHF_ALLOC = 0x2 + SHF_EXCLUDE = 0x8000000 + SHF_EXECINSTR = 0x4 + SHF_GROUP = 0x200 + SHF_INFO_LINK = 0x40 + SHF_LINK_ORDER = 0x80 + SHF_MASKOS = 0xff00000 + SHF_MASKPROC = 0xf0000000 + SHF_MERGE = 0x10 + SHF_ORDERED = 0x4000000 + SHF_OS_NONCONFORMING = 0x100 + SHF_RELA_LIVEPATCH = 0x100000 + SHF_RO_AFTER_INIT = 0x200000 + SHF_STRINGS = 0x20 + SHF_TLS = 0x400 + SHF_WRITE = 0x1 + SHN_ABS = 0xfff1 + SHN_COMMON = 0xfff2 + SHN_HIPROC = 0xff1f + SHN_HIRESERVE = 0xffff + SHN_LIVEPATCH = 0xff20 + SHN_LOPROC = 0xff00 + SHN_LORESERVE = 0xff00 + SHN_UNDEF = 0x0 + SHT_DYNAMIC = 0x6 + SHT_DYNSYM = 0xb + SHT_HASH = 0x5 + SHT_HIPROC = 0x7fffffff + SHT_HIUSER = 0xffffffff + SHT_LOPROC = 0x70000000 + SHT_LOUSER = 0x80000000 + SHT_NOBITS = 0x8 + SHT_NOTE = 0x7 + SHT_NULL = 0x0 + SHT_NUM = 0xc + SHT_PROGBITS = 0x1 + SHT_REL = 0x9 + SHT_RELA = 0x4 + SHT_SHLIB = 0xa + SHT_STRTAB = 0x3 + SHT_SYMTAB = 0x2 SHUT_RD = 0x0 SHUT_RDWR = 0x2 SHUT_WR = 0x1 @@ -3317,6 +3666,16 @@ const ( STATX_UID = 0x8 STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 + STB_GLOBAL = 0x1 + STB_LOCAL = 0x0 + STB_WEAK = 0x2 + STT_COMMON = 0x5 + STT_FILE = 0x4 + STT_FUNC = 0x2 + STT_NOTYPE = 0x0 + STT_OBJECT = 0x1 + STT_SECTION = 0x3 + STT_TLS = 0x6 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 SYNC_FILE_RANGE_WRITE = 0x2 @@ -3553,6 +3912,8 @@ const ( UTIME_OMIT = 0x3ffffffe V9FS_MAGIC = 0x1021997 VERASE = 0x2 + VER_FLG_BASE = 0x1 + VER_FLG_WEAK = 0x2 VINTR = 0x0 VKILL = 0x3 VLNEXT = 0xf diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index 1c37f9fbc4..97a61fc5b8 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -116,6 +116,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index 6f54d34aef..a0d6d498c4 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -116,6 +116,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index 783ec5c126..dd9c903f9a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index ca83d3ba16..384c61ca3a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -120,6 +120,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index 607e611c0c..6384c9831f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -116,6 +116,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index b9cb5bd3c0..553c1c6f15 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x100 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x80 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index 65b078a638..b3339f2099 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x100 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x80 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index 5298a3033d..177091d2bc 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x100 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x80 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index 7bc557c876..c5abf156d0 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x100 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x80 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 152399bb04..f1f3fadf57 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x400 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index 1a1ce2409c..203ad9c54a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x400 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index 4231a1fb57..4b9abcb21a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x400 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index 21c0e95266..f87983037d 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xffffff0f IPV6_FLOWLABEL_MASK = 0xffff0f00 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index f00d1cd7cf..64347eb354 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -115,6 +115,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x80000 IN_NONBLOCK = 0x800 + IOCTL_MEI_NOTIFY_GET = 0x80044803 + IOCTL_MEI_NOTIFY_SET = 0x40044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x7b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index bc8d539e6a..7d71911718 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -119,6 +119,8 @@ const ( IEXTEN = 0x8000 IN_CLOEXEC = 0x400000 IN_NONBLOCK = 0x4000 + IOCTL_MEI_NOTIFY_GET = 0x40044803 + IOCTL_MEI_NOTIFY_SET = 0x80044802 IOCTL_VM_SOCKETS_GET_LOCAL_CID = 0x200007b9 IPV6_FLOWINFO_MASK = 0xfffffff IPV6_FLOWLABEL_MASK = 0xfffff diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 5cc1e8eb2f..8935d10a31 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -2238,3 +2238,13 @@ func Mseal(b []byte, flags uint) (err error) { } return } + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func setMemPolicy(mode int, mask *CPUSet, size int) (err error) { + _, _, e1 := Syscall(SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), uintptr(size)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} diff --git a/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go index c6545413c4..b4609c20c2 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_solaris_amd64.go @@ -72,7 +72,7 @@ import ( //go:cgo_import_dynamic libc_kill kill "libc.so" //go:cgo_import_dynamic libc_lchown lchown "libc.so" //go:cgo_import_dynamic libc_link link "libc.so" -//go:cgo_import_dynamic libc___xnet_llisten __xnet_llisten "libsocket.so" +//go:cgo_import_dynamic libc___xnet_listen __xnet_listen "libsocket.so" //go:cgo_import_dynamic libc_lstat lstat "libc.so" //go:cgo_import_dynamic libc_madvise madvise "libc.so" //go:cgo_import_dynamic libc_mkdir mkdir "libc.so" @@ -221,7 +221,7 @@ import ( //go:linkname procKill libc_kill //go:linkname procLchown libc_lchown //go:linkname procLink libc_link -//go:linkname proc__xnet_llisten libc___xnet_llisten +//go:linkname proc__xnet_listen libc___xnet_listen //go:linkname procLstat libc_lstat //go:linkname procMadvise libc_madvise //go:linkname procMkdir libc_mkdir @@ -371,7 +371,7 @@ var ( procKill, procLchown, procLink, - proc__xnet_llisten, + proc__xnet_listen, procLstat, procMadvise, procMkdir, @@ -1178,7 +1178,7 @@ func Link(path string, link string) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT func Listen(s int, backlog int) (err error) { - _, _, e1 := sysvicall6(uintptr(unsafe.Pointer(&proc__xnet_llisten)), 2, uintptr(s), uintptr(backlog), 0, 0, 0, 0) + _, _, e1 := sysvicall6(uintptr(unsafe.Pointer(&proc__xnet_listen)), 2, uintptr(s), uintptr(backlog), 0, 0, 0, 0) if e1 != 0 { err = errnoErr(e1) } diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index cd236443f6..c1a4670171 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -632,6 +632,8 @@ const ( IFA_FLAGS = 0x8 IFA_RT_PRIORITY = 0x9 IFA_TARGET_NETNSID = 0xa + IFAL_LABEL = 0x2 + IFAL_ADDRESS = 0x1 RT_SCOPE_UNIVERSE = 0x0 RT_SCOPE_SITE = 0xc8 RT_SCOPE_LINK = 0xfd @@ -689,6 +691,7 @@ const ( SizeofRtAttr = 0x4 SizeofIfInfomsg = 0x10 SizeofIfAddrmsg = 0x8 + SizeofIfAddrlblmsg = 0xc SizeofIfaCacheinfo = 0x10 SizeofRtMsg = 0xc SizeofRtNexthop = 0x8 @@ -740,6 +743,15 @@ type IfAddrmsg struct { Index uint32 } +type IfAddrlblmsg struct { + Family uint8 + _ uint8 + Prefixlen uint8 + Flags uint8 + Index uint32 + Seq uint32 +} + type IfaCacheinfo struct { Prefered uint32 Valid uint32 @@ -3052,6 +3064,23 @@ const ( ) const ( + TCA_UNSPEC = 0x0 + TCA_KIND = 0x1 + TCA_OPTIONS = 0x2 + TCA_STATS = 0x3 + TCA_XSTATS = 0x4 + TCA_RATE = 0x5 + TCA_FCNT = 0x6 + TCA_STATS2 = 0x7 + TCA_STAB = 0x8 + TCA_PAD = 0x9 + TCA_DUMP_INVISIBLE = 0xa + TCA_CHAIN = 0xb + TCA_HW_OFFLOAD = 0xc + TCA_INGRESS_BLOCK = 0xd + TCA_EGRESS_BLOCK = 0xe + TCA_DUMP_FLAGS = 0xf + TCA_EXT_WARN_MSG = 0x10 RTNLGRP_NONE = 0x0 RTNLGRP_LINK = 0x1 RTNLGRP_NOTIFY = 0x2 @@ -3086,6 +3115,18 @@ const ( RTNLGRP_IPV6_MROUTE_R = 0x1f RTNLGRP_NEXTHOP = 0x20 RTNLGRP_BRVLAN = 0x21 + RTNLGRP_MCTP_IFADDR = 0x22 + RTNLGRP_TUNNEL = 0x23 + RTNLGRP_STATS = 0x24 + RTNLGRP_IPV4_MCADDR = 0x25 + RTNLGRP_IPV6_MCADDR = 0x26 + RTNLGRP_IPV6_ACADDR = 0x27 + TCA_ROOT_UNSPEC = 0x0 + TCA_ROOT_TAB = 0x1 + TCA_ROOT_FLAGS = 0x2 + TCA_ROOT_COUNT = 0x3 + TCA_ROOT_TIME_DELTA = 0x4 + TCA_ROOT_EXT_WARN_MSG = 0x5 ) type CapUserHeader struct { @@ -3549,6 +3590,8 @@ type Nhmsg struct { Flags uint32 } +const SizeofNhmsg = 0x8 + type NexthopGrp struct { Id uint32 Weight uint8 @@ -3556,6 +3599,8 @@ type NexthopGrp struct { Resvd2 uint16 } +const SizeofNexthopGrp = 0x8 + const ( NHA_UNSPEC = 0x0 NHA_ID = 0x1 @@ -6291,3 +6336,30 @@ type SockDiagReq struct { } const RTM_NEWNVLAN = 0x70 + +const ( + MPOL_BIND = 0x2 + MPOL_DEFAULT = 0x0 + MPOL_F_ADDR = 0x2 + MPOL_F_MEMS_ALLOWED = 0x4 + MPOL_F_MOF = 0x8 + MPOL_F_MORON = 0x10 + MPOL_F_NODE = 0x1 + MPOL_F_NUMA_BALANCING = 0x2000 + MPOL_F_RELATIVE_NODES = 0x4000 + MPOL_F_SHARED = 0x1 + MPOL_F_STATIC_NODES = 0x8000 + MPOL_INTERLEAVE = 0x3 + MPOL_LOCAL = 0x4 + MPOL_MAX = 0x7 + MPOL_MF_INTERNAL = 0x10 + MPOL_MF_LAZY = 0x8 + MPOL_MF_MOVE_ALL = 0x4 + MPOL_MF_MOVE = 0x2 + MPOL_MF_STRICT = 0x1 + MPOL_MF_VALID = 0x7 + MPOL_MODE_FLAGS = 0xe000 + MPOL_PREFERRED = 0x1 + MPOL_PREFERRED_MANY = 0x5 + MPOL_WEIGHTED_INTERLEAVE = 0x6 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_netbsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_netbsd_arm.go index 439548ec9a..50e8e64497 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_netbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_netbsd_arm.go @@ -104,7 +104,7 @@ type Statvfs_t struct { Fsid uint32 Namemax uint32 Owner uint32 - Spare [4]uint32 + Spare [4]uint64 Fstypename [32]byte Mntonname [1024]byte Mntfromname [1024]byte diff --git a/vendor/golang.org/x/sys/windows/registry/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/registry/zsyscall_windows.go index fc1835d8a2..bc1ce4360b 100644 --- a/vendor/golang.org/x/sys/windows/registry/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/registry/zsyscall_windows.go @@ -52,7 +52,7 @@ var ( ) func regConnectRegistry(machinename *uint16, key syscall.Handle, result *syscall.Handle) (regerrno error) { - r0, _, _ := syscall.Syscall(procRegConnectRegistryW.Addr(), 3, uintptr(unsafe.Pointer(machinename)), uintptr(key), uintptr(unsafe.Pointer(result))) + r0, _, _ := syscall.SyscallN(procRegConnectRegistryW.Addr(), uintptr(unsafe.Pointer(machinename)), uintptr(key), uintptr(unsafe.Pointer(result))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -60,7 +60,7 @@ func regConnectRegistry(machinename *uint16, key syscall.Handle, result *syscall } func regCreateKeyEx(key syscall.Handle, subkey *uint16, reserved uint32, class *uint16, options uint32, desired uint32, sa *syscall.SecurityAttributes, result *syscall.Handle, disposition *uint32) (regerrno error) { - r0, _, _ := syscall.Syscall9(procRegCreateKeyExW.Addr(), 9, uintptr(key), uintptr(unsafe.Pointer(subkey)), uintptr(reserved), uintptr(unsafe.Pointer(class)), uintptr(options), uintptr(desired), uintptr(unsafe.Pointer(sa)), uintptr(unsafe.Pointer(result)), uintptr(unsafe.Pointer(disposition))) + r0, _, _ := syscall.SyscallN(procRegCreateKeyExW.Addr(), uintptr(key), uintptr(unsafe.Pointer(subkey)), uintptr(reserved), uintptr(unsafe.Pointer(class)), uintptr(options), uintptr(desired), uintptr(unsafe.Pointer(sa)), uintptr(unsafe.Pointer(result)), uintptr(unsafe.Pointer(disposition))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -68,7 +68,7 @@ func regCreateKeyEx(key syscall.Handle, subkey *uint16, reserved uint32, class * } func regDeleteKey(key syscall.Handle, subkey *uint16) (regerrno error) { - r0, _, _ := syscall.Syscall(procRegDeleteKeyW.Addr(), 2, uintptr(key), uintptr(unsafe.Pointer(subkey)), 0) + r0, _, _ := syscall.SyscallN(procRegDeleteKeyW.Addr(), uintptr(key), uintptr(unsafe.Pointer(subkey))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -76,7 +76,7 @@ func regDeleteKey(key syscall.Handle, subkey *uint16) (regerrno error) { } func regDeleteValue(key syscall.Handle, name *uint16) (regerrno error) { - r0, _, _ := syscall.Syscall(procRegDeleteValueW.Addr(), 2, uintptr(key), uintptr(unsafe.Pointer(name)), 0) + r0, _, _ := syscall.SyscallN(procRegDeleteValueW.Addr(), uintptr(key), uintptr(unsafe.Pointer(name))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -84,7 +84,7 @@ func regDeleteValue(key syscall.Handle, name *uint16) (regerrno error) { } func regEnumValue(key syscall.Handle, index uint32, name *uint16, nameLen *uint32, reserved *uint32, valtype *uint32, buf *byte, buflen *uint32) (regerrno error) { - r0, _, _ := syscall.Syscall9(procRegEnumValueW.Addr(), 8, uintptr(key), uintptr(index), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(valtype)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(buflen)), 0) + r0, _, _ := syscall.SyscallN(procRegEnumValueW.Addr(), uintptr(key), uintptr(index), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(valtype)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(buflen))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -92,7 +92,7 @@ func regEnumValue(key syscall.Handle, index uint32, name *uint16, nameLen *uint3 } func regLoadMUIString(key syscall.Handle, name *uint16, buf *uint16, buflen uint32, buflenCopied *uint32, flags uint32, dir *uint16) (regerrno error) { - r0, _, _ := syscall.Syscall9(procRegLoadMUIStringW.Addr(), 7, uintptr(key), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buf)), uintptr(buflen), uintptr(unsafe.Pointer(buflenCopied)), uintptr(flags), uintptr(unsafe.Pointer(dir)), 0, 0) + r0, _, _ := syscall.SyscallN(procRegLoadMUIStringW.Addr(), uintptr(key), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buf)), uintptr(buflen), uintptr(unsafe.Pointer(buflenCopied)), uintptr(flags), uintptr(unsafe.Pointer(dir))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -100,7 +100,7 @@ func regLoadMUIString(key syscall.Handle, name *uint16, buf *uint16, buflen uint } func regSetValueEx(key syscall.Handle, valueName *uint16, reserved uint32, vtype uint32, buf *byte, bufsize uint32) (regerrno error) { - r0, _, _ := syscall.Syscall6(procRegSetValueExW.Addr(), 6, uintptr(key), uintptr(unsafe.Pointer(valueName)), uintptr(reserved), uintptr(vtype), uintptr(unsafe.Pointer(buf)), uintptr(bufsize)) + r0, _, _ := syscall.SyscallN(procRegSetValueExW.Addr(), uintptr(key), uintptr(unsafe.Pointer(valueName)), uintptr(reserved), uintptr(vtype), uintptr(unsafe.Pointer(buf)), uintptr(bufsize)) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -108,7 +108,7 @@ func regSetValueEx(key syscall.Handle, valueName *uint16, reserved uint32, vtype } func expandEnvironmentStrings(src *uint16, dst *uint16, size uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procExpandEnvironmentStringsW.Addr(), 3, uintptr(unsafe.Pointer(src)), uintptr(unsafe.Pointer(dst)), uintptr(size)) + r0, _, e1 := syscall.SyscallN(procExpandEnvironmentStringsW.Addr(), uintptr(unsafe.Pointer(src)), uintptr(unsafe.Pointer(dst)), uintptr(size)) n = uint32(r0) if n == 0 { err = errnoErr(e1) diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 640f6b153f..69439df2a4 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -321,6 +321,8 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW +//sys GetNumberOfConsoleInputEvents(console Handle, numevents *uint32) (err error) = kernel32.GetNumberOfConsoleInputEvents +//sys FlushConsoleInputBuffer(console Handle) (err error) = kernel32.FlushConsoleInputBuffer //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole //sys CreateToolhelp32Snapshot(flags uint32, processId uint32) (handle Handle, err error) [failretval==InvalidHandle] = kernel32.CreateToolhelp32Snapshot //sys Module32First(snapshot Handle, moduleEntry *ModuleEntry32) (err error) = kernel32.Module32FirstW @@ -890,8 +892,12 @@ const socket_error = uintptr(^uint32(0)) //sys MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) = kernel32.MultiByteToWideChar //sys getBestInterfaceEx(sockaddr unsafe.Pointer, pdwBestIfIndex *uint32) (errcode error) = iphlpapi.GetBestInterfaceEx //sys GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) = iphlpapi.GetIfEntry2Ex +//sys GetIpForwardEntry2(row *MibIpForwardRow2) (errcode error) = iphlpapi.GetIpForwardEntry2 +//sys GetIpForwardTable2(family uint16, table **MibIpForwardTable2) (errcode error) = iphlpapi.GetIpForwardTable2 //sys GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) = iphlpapi.GetUnicastIpAddressEntry +//sys FreeMibTable(memory unsafe.Pointer) = iphlpapi.FreeMibTable //sys NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyIpInterfaceChange +//sys NotifyRouteChange2(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyRouteChange2 //sys NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyUnicastIpAddressChange //sys CancelMibChangeNotify2(notificationHandle Handle) (errcode error) = iphlpapi.CancelMibChangeNotify2 @@ -914,6 +920,17 @@ type RawSockaddrInet6 struct { Scope_id uint32 } +// RawSockaddrInet is a union that contains an IPv4, an IPv6 address, or an address family. See +// https://learn.microsoft.com/en-us/windows/win32/api/ws2ipdef/ns-ws2ipdef-sockaddr_inet. +// +// A [*RawSockaddrInet] may be converted to a [*RawSockaddrInet4] or [*RawSockaddrInet6] using +// unsafe, depending on the address family. +type RawSockaddrInet struct { + Family uint16 + Port uint16 + Data [6]uint32 +} + type RawSockaddr struct { Family uint16 Data [14]int8 diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index 958bcf47a3..6e4f50eb48 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -65,6 +65,22 @@ var signals = [...]string{ 15: "terminated", } +// File flags for [os.OpenFile]. The O_ prefix is used to indicate +// that these flags are specific to the OpenFile function. +const ( + O_FILE_FLAG_OPEN_NO_RECALL = FILE_FLAG_OPEN_NO_RECALL + O_FILE_FLAG_OPEN_REPARSE_POINT = FILE_FLAG_OPEN_REPARSE_POINT + O_FILE_FLAG_SESSION_AWARE = FILE_FLAG_SESSION_AWARE + O_FILE_FLAG_POSIX_SEMANTICS = FILE_FLAG_POSIX_SEMANTICS + O_FILE_FLAG_BACKUP_SEMANTICS = FILE_FLAG_BACKUP_SEMANTICS + O_FILE_FLAG_DELETE_ON_CLOSE = FILE_FLAG_DELETE_ON_CLOSE + O_FILE_FLAG_SEQUENTIAL_SCAN = FILE_FLAG_SEQUENTIAL_SCAN + O_FILE_FLAG_RANDOM_ACCESS = FILE_FLAG_RANDOM_ACCESS + O_FILE_FLAG_NO_BUFFERING = FILE_FLAG_NO_BUFFERING + O_FILE_FLAG_OVERLAPPED = FILE_FLAG_OVERLAPPED + O_FILE_FLAG_WRITE_THROUGH = FILE_FLAG_WRITE_THROUGH +) + const ( FILE_READ_DATA = 0x00000001 FILE_READ_ATTRIBUTES = 0x00000080 @@ -1976,6 +1992,12 @@ const ( SYMBOLIC_LINK_FLAG_DIRECTORY = 0x1 ) +// FILE_ZERO_DATA_INFORMATION from winioctl.h +type FileZeroDataInformation struct { + FileOffset int64 + BeyondFinalZero int64 +} + const ( ComputerNameNetBIOS = 0 ComputerNameDnsHostname = 1 @@ -2298,6 +2320,82 @@ type MibIfRow2 struct { OutQLen uint64 } +// IP_ADDRESS_PREFIX stores an IP address prefix. See +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-ip_address_prefix. +type IpAddressPrefix struct { + Prefix RawSockaddrInet + PrefixLength uint8 +} + +// NL_ROUTE_ORIGIN enumeration from nldef.h or +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_route_origin. +const ( + NlroManual = 0 + NlroWellKnown = 1 + NlroDHCP = 2 + NlroRouterAdvertisement = 3 + Nlro6to4 = 4 +) + +// NL_ROUTE_ORIGIN enumeration from nldef.h or +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_route_protocol. +const ( + MIB_IPPROTO_OTHER = 1 + MIB_IPPROTO_LOCAL = 2 + MIB_IPPROTO_NETMGMT = 3 + MIB_IPPROTO_ICMP = 4 + MIB_IPPROTO_EGP = 5 + MIB_IPPROTO_GGP = 6 + MIB_IPPROTO_HELLO = 7 + MIB_IPPROTO_RIP = 8 + MIB_IPPROTO_IS_IS = 9 + MIB_IPPROTO_ES_IS = 10 + MIB_IPPROTO_CISCO = 11 + MIB_IPPROTO_BBN = 12 + MIB_IPPROTO_OSPF = 13 + MIB_IPPROTO_BGP = 14 + MIB_IPPROTO_IDPR = 15 + MIB_IPPROTO_EIGRP = 16 + MIB_IPPROTO_DVMRP = 17 + MIB_IPPROTO_RPL = 18 + MIB_IPPROTO_DHCP = 19 + MIB_IPPROTO_NT_AUTOSTATIC = 10002 + MIB_IPPROTO_NT_STATIC = 10006 + MIB_IPPROTO_NT_STATIC_NON_DOD = 10007 +) + +// MIB_IPFORWARD_ROW2 stores information about an IP route entry. See +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_ipforward_row2. +type MibIpForwardRow2 struct { + InterfaceLuid uint64 + InterfaceIndex uint32 + DestinationPrefix IpAddressPrefix + NextHop RawSockaddrInet + SitePrefixLength uint8 + ValidLifetime uint32 + PreferredLifetime uint32 + Metric uint32 + Protocol uint32 + Loopback uint8 + AutoconfigureAddress uint8 + Publish uint8 + Immortal uint8 + Age uint32 + Origin uint32 +} + +// MIB_IPFORWARD_TABLE2 contains a table of IP route entries. See +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_ipforward_table2. +type MibIpForwardTable2 struct { + NumEntries uint32 + Table [1]MibIpForwardRow2 +} + +// Rows returns the IP route entries in the table. +func (t *MibIpForwardTable2) Rows() []MibIpForwardRow2 { + return unsafe.Slice(&t.Table[0], t.NumEntries) +} + // MIB_UNICASTIPADDRESS_ROW stores information about a unicast IP address. See // https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_unicastipaddress_row. type MibUnicastIpAddressRow struct { diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index a58bc48b8e..f25b7308a1 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -182,13 +182,17 @@ var ( procDwmGetWindowAttribute = moddwmapi.NewProc("DwmGetWindowAttribute") procDwmSetWindowAttribute = moddwmapi.NewProc("DwmSetWindowAttribute") procCancelMibChangeNotify2 = modiphlpapi.NewProc("CancelMibChangeNotify2") + procFreeMibTable = modiphlpapi.NewProc("FreeMibTable") procGetAdaptersAddresses = modiphlpapi.NewProc("GetAdaptersAddresses") procGetAdaptersInfo = modiphlpapi.NewProc("GetAdaptersInfo") procGetBestInterfaceEx = modiphlpapi.NewProc("GetBestInterfaceEx") procGetIfEntry = modiphlpapi.NewProc("GetIfEntry") procGetIfEntry2Ex = modiphlpapi.NewProc("GetIfEntry2Ex") + procGetIpForwardEntry2 = modiphlpapi.NewProc("GetIpForwardEntry2") + procGetIpForwardTable2 = modiphlpapi.NewProc("GetIpForwardTable2") procGetUnicastIpAddressEntry = modiphlpapi.NewProc("GetUnicastIpAddressEntry") procNotifyIpInterfaceChange = modiphlpapi.NewProc("NotifyIpInterfaceChange") + procNotifyRouteChange2 = modiphlpapi.NewProc("NotifyRouteChange2") procNotifyUnicastIpAddressChange = modiphlpapi.NewProc("NotifyUnicastIpAddressChange") procAddDllDirectory = modkernel32.NewProc("AddDllDirectory") procAssignProcessToJobObject = modkernel32.NewProc("AssignProcessToJobObject") @@ -238,6 +242,7 @@ var ( procFindResourceW = modkernel32.NewProc("FindResourceW") procFindVolumeClose = modkernel32.NewProc("FindVolumeClose") procFindVolumeMountPointClose = modkernel32.NewProc("FindVolumeMountPointClose") + procFlushConsoleInputBuffer = modkernel32.NewProc("FlushConsoleInputBuffer") procFlushFileBuffers = modkernel32.NewProc("FlushFileBuffers") procFlushViewOfFile = modkernel32.NewProc("FlushViewOfFile") procFormatMessageW = modkernel32.NewProc("FormatMessageW") @@ -284,6 +289,7 @@ var ( procGetNamedPipeHandleStateW = modkernel32.NewProc("GetNamedPipeHandleStateW") procGetNamedPipeInfo = modkernel32.NewProc("GetNamedPipeInfo") procGetNamedPipeServerProcessId = modkernel32.NewProc("GetNamedPipeServerProcessId") + procGetNumberOfConsoleInputEvents = modkernel32.NewProc("GetNumberOfConsoleInputEvents") procGetOverlappedResult = modkernel32.NewProc("GetOverlappedResult") procGetPriorityClass = modkernel32.NewProc("GetPriorityClass") procGetProcAddress = modkernel32.NewProc("GetProcAddress") @@ -546,25 +552,25 @@ var ( ) func cm_Get_DevNode_Status(status *uint32, problemNumber *uint32, devInst DEVINST, flags uint32) (ret CONFIGRET) { - r0, _, _ := syscall.Syscall6(procCM_Get_DevNode_Status.Addr(), 4, uintptr(unsafe.Pointer(status)), uintptr(unsafe.Pointer(problemNumber)), uintptr(devInst), uintptr(flags), 0, 0) + r0, _, _ := syscall.SyscallN(procCM_Get_DevNode_Status.Addr(), uintptr(unsafe.Pointer(status)), uintptr(unsafe.Pointer(problemNumber)), uintptr(devInst), uintptr(flags)) ret = CONFIGRET(r0) return } func cm_Get_Device_Interface_List(interfaceClass *GUID, deviceID *uint16, buffer *uint16, bufferLen uint32, flags uint32) (ret CONFIGRET) { - r0, _, _ := syscall.Syscall6(procCM_Get_Device_Interface_ListW.Addr(), 5, uintptr(unsafe.Pointer(interfaceClass)), uintptr(unsafe.Pointer(deviceID)), uintptr(unsafe.Pointer(buffer)), uintptr(bufferLen), uintptr(flags), 0) + r0, _, _ := syscall.SyscallN(procCM_Get_Device_Interface_ListW.Addr(), uintptr(unsafe.Pointer(interfaceClass)), uintptr(unsafe.Pointer(deviceID)), uintptr(unsafe.Pointer(buffer)), uintptr(bufferLen), uintptr(flags)) ret = CONFIGRET(r0) return } func cm_Get_Device_Interface_List_Size(len *uint32, interfaceClass *GUID, deviceID *uint16, flags uint32) (ret CONFIGRET) { - r0, _, _ := syscall.Syscall6(procCM_Get_Device_Interface_List_SizeW.Addr(), 4, uintptr(unsafe.Pointer(len)), uintptr(unsafe.Pointer(interfaceClass)), uintptr(unsafe.Pointer(deviceID)), uintptr(flags), 0, 0) + r0, _, _ := syscall.SyscallN(procCM_Get_Device_Interface_List_SizeW.Addr(), uintptr(unsafe.Pointer(len)), uintptr(unsafe.Pointer(interfaceClass)), uintptr(unsafe.Pointer(deviceID)), uintptr(flags)) ret = CONFIGRET(r0) return } func cm_MapCrToWin32Err(configRet CONFIGRET, defaultWin32Error Errno) (ret Errno) { - r0, _, _ := syscall.Syscall(procCM_MapCrToWin32Err.Addr(), 2, uintptr(configRet), uintptr(defaultWin32Error), 0) + r0, _, _ := syscall.SyscallN(procCM_MapCrToWin32Err.Addr(), uintptr(configRet), uintptr(defaultWin32Error)) ret = Errno(r0) return } @@ -574,7 +580,7 @@ func AdjustTokenGroups(token Token, resetToDefault bool, newstate *Tokengroups, if resetToDefault { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procAdjustTokenGroups.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(newstate)), uintptr(buflen), uintptr(unsafe.Pointer(prevstate)), uintptr(unsafe.Pointer(returnlen))) + r1, _, e1 := syscall.SyscallN(procAdjustTokenGroups.Addr(), uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(newstate)), uintptr(buflen), uintptr(unsafe.Pointer(prevstate)), uintptr(unsafe.Pointer(returnlen))) if r1 == 0 { err = errnoErr(e1) } @@ -586,7 +592,7 @@ func AdjustTokenPrivileges(token Token, disableAllPrivileges bool, newstate *Tok if disableAllPrivileges { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procAdjustTokenPrivileges.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(newstate)), uintptr(buflen), uintptr(unsafe.Pointer(prevstate)), uintptr(unsafe.Pointer(returnlen))) + r1, _, e1 := syscall.SyscallN(procAdjustTokenPrivileges.Addr(), uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(newstate)), uintptr(buflen), uintptr(unsafe.Pointer(prevstate)), uintptr(unsafe.Pointer(returnlen))) if r1 == 0 { err = errnoErr(e1) } @@ -594,7 +600,7 @@ func AdjustTokenPrivileges(token Token, disableAllPrivileges bool, newstate *Tok } func AllocateAndInitializeSid(identAuth *SidIdentifierAuthority, subAuth byte, subAuth0 uint32, subAuth1 uint32, subAuth2 uint32, subAuth3 uint32, subAuth4 uint32, subAuth5 uint32, subAuth6 uint32, subAuth7 uint32, sid **SID) (err error) { - r1, _, e1 := syscall.Syscall12(procAllocateAndInitializeSid.Addr(), 11, uintptr(unsafe.Pointer(identAuth)), uintptr(subAuth), uintptr(subAuth0), uintptr(subAuth1), uintptr(subAuth2), uintptr(subAuth3), uintptr(subAuth4), uintptr(subAuth5), uintptr(subAuth6), uintptr(subAuth7), uintptr(unsafe.Pointer(sid)), 0) + r1, _, e1 := syscall.SyscallN(procAllocateAndInitializeSid.Addr(), uintptr(unsafe.Pointer(identAuth)), uintptr(subAuth), uintptr(subAuth0), uintptr(subAuth1), uintptr(subAuth2), uintptr(subAuth3), uintptr(subAuth4), uintptr(subAuth5), uintptr(subAuth6), uintptr(subAuth7), uintptr(unsafe.Pointer(sid))) if r1 == 0 { err = errnoErr(e1) } @@ -602,7 +608,7 @@ func AllocateAndInitializeSid(identAuth *SidIdentifierAuthority, subAuth byte, s } func buildSecurityDescriptor(owner *TRUSTEE, group *TRUSTEE, countAccessEntries uint32, accessEntries *EXPLICIT_ACCESS, countAuditEntries uint32, auditEntries *EXPLICIT_ACCESS, oldSecurityDescriptor *SECURITY_DESCRIPTOR, sizeNewSecurityDescriptor *uint32, newSecurityDescriptor **SECURITY_DESCRIPTOR) (ret error) { - r0, _, _ := syscall.Syscall9(procBuildSecurityDescriptorW.Addr(), 9, uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(countAccessEntries), uintptr(unsafe.Pointer(accessEntries)), uintptr(countAuditEntries), uintptr(unsafe.Pointer(auditEntries)), uintptr(unsafe.Pointer(oldSecurityDescriptor)), uintptr(unsafe.Pointer(sizeNewSecurityDescriptor)), uintptr(unsafe.Pointer(newSecurityDescriptor))) + r0, _, _ := syscall.SyscallN(procBuildSecurityDescriptorW.Addr(), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(countAccessEntries), uintptr(unsafe.Pointer(accessEntries)), uintptr(countAuditEntries), uintptr(unsafe.Pointer(auditEntries)), uintptr(unsafe.Pointer(oldSecurityDescriptor)), uintptr(unsafe.Pointer(sizeNewSecurityDescriptor)), uintptr(unsafe.Pointer(newSecurityDescriptor))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -610,7 +616,7 @@ func buildSecurityDescriptor(owner *TRUSTEE, group *TRUSTEE, countAccessEntries } func ChangeServiceConfig2(service Handle, infoLevel uint32, info *byte) (err error) { - r1, _, e1 := syscall.Syscall(procChangeServiceConfig2W.Addr(), 3, uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(info))) + r1, _, e1 := syscall.SyscallN(procChangeServiceConfig2W.Addr(), uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(info))) if r1 == 0 { err = errnoErr(e1) } @@ -618,7 +624,7 @@ func ChangeServiceConfig2(service Handle, infoLevel uint32, info *byte) (err err } func ChangeServiceConfig(service Handle, serviceType uint32, startType uint32, errorControl uint32, binaryPathName *uint16, loadOrderGroup *uint16, tagId *uint32, dependencies *uint16, serviceStartName *uint16, password *uint16, displayName *uint16) (err error) { - r1, _, e1 := syscall.Syscall12(procChangeServiceConfigW.Addr(), 11, uintptr(service), uintptr(serviceType), uintptr(startType), uintptr(errorControl), uintptr(unsafe.Pointer(binaryPathName)), uintptr(unsafe.Pointer(loadOrderGroup)), uintptr(unsafe.Pointer(tagId)), uintptr(unsafe.Pointer(dependencies)), uintptr(unsafe.Pointer(serviceStartName)), uintptr(unsafe.Pointer(password)), uintptr(unsafe.Pointer(displayName)), 0) + r1, _, e1 := syscall.SyscallN(procChangeServiceConfigW.Addr(), uintptr(service), uintptr(serviceType), uintptr(startType), uintptr(errorControl), uintptr(unsafe.Pointer(binaryPathName)), uintptr(unsafe.Pointer(loadOrderGroup)), uintptr(unsafe.Pointer(tagId)), uintptr(unsafe.Pointer(dependencies)), uintptr(unsafe.Pointer(serviceStartName)), uintptr(unsafe.Pointer(password)), uintptr(unsafe.Pointer(displayName))) if r1 == 0 { err = errnoErr(e1) } @@ -626,7 +632,7 @@ func ChangeServiceConfig(service Handle, serviceType uint32, startType uint32, e } func checkTokenMembership(tokenHandle Token, sidToCheck *SID, isMember *int32) (err error) { - r1, _, e1 := syscall.Syscall(procCheckTokenMembership.Addr(), 3, uintptr(tokenHandle), uintptr(unsafe.Pointer(sidToCheck)), uintptr(unsafe.Pointer(isMember))) + r1, _, e1 := syscall.SyscallN(procCheckTokenMembership.Addr(), uintptr(tokenHandle), uintptr(unsafe.Pointer(sidToCheck)), uintptr(unsafe.Pointer(isMember))) if r1 == 0 { err = errnoErr(e1) } @@ -634,7 +640,7 @@ func checkTokenMembership(tokenHandle Token, sidToCheck *SID, isMember *int32) ( } func CloseServiceHandle(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procCloseServiceHandle.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procCloseServiceHandle.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -642,7 +648,7 @@ func CloseServiceHandle(handle Handle) (err error) { } func ControlService(service Handle, control uint32, status *SERVICE_STATUS) (err error) { - r1, _, e1 := syscall.Syscall(procControlService.Addr(), 3, uintptr(service), uintptr(control), uintptr(unsafe.Pointer(status))) + r1, _, e1 := syscall.SyscallN(procControlService.Addr(), uintptr(service), uintptr(control), uintptr(unsafe.Pointer(status))) if r1 == 0 { err = errnoErr(e1) } @@ -650,7 +656,7 @@ func ControlService(service Handle, control uint32, status *SERVICE_STATUS) (err } func convertSecurityDescriptorToStringSecurityDescriptor(sd *SECURITY_DESCRIPTOR, revision uint32, securityInformation SECURITY_INFORMATION, str **uint16, strLen *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procConvertSecurityDescriptorToStringSecurityDescriptorW.Addr(), 5, uintptr(unsafe.Pointer(sd)), uintptr(revision), uintptr(securityInformation), uintptr(unsafe.Pointer(str)), uintptr(unsafe.Pointer(strLen)), 0) + r1, _, e1 := syscall.SyscallN(procConvertSecurityDescriptorToStringSecurityDescriptorW.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(revision), uintptr(securityInformation), uintptr(unsafe.Pointer(str)), uintptr(unsafe.Pointer(strLen))) if r1 == 0 { err = errnoErr(e1) } @@ -658,7 +664,7 @@ func convertSecurityDescriptorToStringSecurityDescriptor(sd *SECURITY_DESCRIPTOR } func ConvertSidToStringSid(sid *SID, stringSid **uint16) (err error) { - r1, _, e1 := syscall.Syscall(procConvertSidToStringSidW.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(stringSid)), 0) + r1, _, e1 := syscall.SyscallN(procConvertSidToStringSidW.Addr(), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(stringSid))) if r1 == 0 { err = errnoErr(e1) } @@ -675,7 +681,7 @@ func convertStringSecurityDescriptorToSecurityDescriptor(str string, revision ui } func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision uint32, sd **SECURITY_DESCRIPTOR, size *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procConvertStringSecurityDescriptorToSecurityDescriptorW.Addr(), 4, uintptr(unsafe.Pointer(str)), uintptr(revision), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(size)), 0, 0) + r1, _, e1 := syscall.SyscallN(procConvertStringSecurityDescriptorToSecurityDescriptorW.Addr(), uintptr(unsafe.Pointer(str)), uintptr(revision), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(size))) if r1 == 0 { err = errnoErr(e1) } @@ -683,7 +689,7 @@ func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision } func ConvertStringSidToSid(stringSid *uint16, sid **SID) (err error) { - r1, _, e1 := syscall.Syscall(procConvertStringSidToSidW.Addr(), 2, uintptr(unsafe.Pointer(stringSid)), uintptr(unsafe.Pointer(sid)), 0) + r1, _, e1 := syscall.SyscallN(procConvertStringSidToSidW.Addr(), uintptr(unsafe.Pointer(stringSid)), uintptr(unsafe.Pointer(sid))) if r1 == 0 { err = errnoErr(e1) } @@ -691,7 +697,7 @@ func ConvertStringSidToSid(stringSid *uint16, sid **SID) (err error) { } func CopySid(destSidLen uint32, destSid *SID, srcSid *SID) (err error) { - r1, _, e1 := syscall.Syscall(procCopySid.Addr(), 3, uintptr(destSidLen), uintptr(unsafe.Pointer(destSid)), uintptr(unsafe.Pointer(srcSid))) + r1, _, e1 := syscall.SyscallN(procCopySid.Addr(), uintptr(destSidLen), uintptr(unsafe.Pointer(destSid)), uintptr(unsafe.Pointer(srcSid))) if r1 == 0 { err = errnoErr(e1) } @@ -703,7 +709,7 @@ func CreateProcessAsUser(token Token, appName *uint16, commandLine *uint16, proc if inheritHandles { _p0 = 1 } - r1, _, e1 := syscall.Syscall12(procCreateProcessAsUserW.Addr(), 11, uintptr(token), uintptr(unsafe.Pointer(appName)), uintptr(unsafe.Pointer(commandLine)), uintptr(unsafe.Pointer(procSecurity)), uintptr(unsafe.Pointer(threadSecurity)), uintptr(_p0), uintptr(creationFlags), uintptr(unsafe.Pointer(env)), uintptr(unsafe.Pointer(currentDir)), uintptr(unsafe.Pointer(startupInfo)), uintptr(unsafe.Pointer(outProcInfo)), 0) + r1, _, e1 := syscall.SyscallN(procCreateProcessAsUserW.Addr(), uintptr(token), uintptr(unsafe.Pointer(appName)), uintptr(unsafe.Pointer(commandLine)), uintptr(unsafe.Pointer(procSecurity)), uintptr(unsafe.Pointer(threadSecurity)), uintptr(_p0), uintptr(creationFlags), uintptr(unsafe.Pointer(env)), uintptr(unsafe.Pointer(currentDir)), uintptr(unsafe.Pointer(startupInfo)), uintptr(unsafe.Pointer(outProcInfo))) if r1 == 0 { err = errnoErr(e1) } @@ -711,7 +717,7 @@ func CreateProcessAsUser(token Token, appName *uint16, commandLine *uint16, proc } func CreateService(mgr Handle, serviceName *uint16, displayName *uint16, access uint32, srvType uint32, startType uint32, errCtl uint32, pathName *uint16, loadOrderGroup *uint16, tagId *uint32, dependencies *uint16, serviceStartName *uint16, password *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall15(procCreateServiceW.Addr(), 13, uintptr(mgr), uintptr(unsafe.Pointer(serviceName)), uintptr(unsafe.Pointer(displayName)), uintptr(access), uintptr(srvType), uintptr(startType), uintptr(errCtl), uintptr(unsafe.Pointer(pathName)), uintptr(unsafe.Pointer(loadOrderGroup)), uintptr(unsafe.Pointer(tagId)), uintptr(unsafe.Pointer(dependencies)), uintptr(unsafe.Pointer(serviceStartName)), uintptr(unsafe.Pointer(password)), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateServiceW.Addr(), uintptr(mgr), uintptr(unsafe.Pointer(serviceName)), uintptr(unsafe.Pointer(displayName)), uintptr(access), uintptr(srvType), uintptr(startType), uintptr(errCtl), uintptr(unsafe.Pointer(pathName)), uintptr(unsafe.Pointer(loadOrderGroup)), uintptr(unsafe.Pointer(tagId)), uintptr(unsafe.Pointer(dependencies)), uintptr(unsafe.Pointer(serviceStartName)), uintptr(unsafe.Pointer(password))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -720,7 +726,7 @@ func CreateService(mgr Handle, serviceName *uint16, displayName *uint16, access } func createWellKnownSid(sidType WELL_KNOWN_SID_TYPE, domainSid *SID, sid *SID, sizeSid *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procCreateWellKnownSid.Addr(), 4, uintptr(sidType), uintptr(unsafe.Pointer(domainSid)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sizeSid)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCreateWellKnownSid.Addr(), uintptr(sidType), uintptr(unsafe.Pointer(domainSid)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sizeSid))) if r1 == 0 { err = errnoErr(e1) } @@ -728,7 +734,7 @@ func createWellKnownSid(sidType WELL_KNOWN_SID_TYPE, domainSid *SID, sid *SID, s } func CryptAcquireContext(provhandle *Handle, container *uint16, provider *uint16, provtype uint32, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procCryptAcquireContextW.Addr(), 5, uintptr(unsafe.Pointer(provhandle)), uintptr(unsafe.Pointer(container)), uintptr(unsafe.Pointer(provider)), uintptr(provtype), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procCryptAcquireContextW.Addr(), uintptr(unsafe.Pointer(provhandle)), uintptr(unsafe.Pointer(container)), uintptr(unsafe.Pointer(provider)), uintptr(provtype), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -736,7 +742,7 @@ func CryptAcquireContext(provhandle *Handle, container *uint16, provider *uint16 } func CryptGenRandom(provhandle Handle, buflen uint32, buf *byte) (err error) { - r1, _, e1 := syscall.Syscall(procCryptGenRandom.Addr(), 3, uintptr(provhandle), uintptr(buflen), uintptr(unsafe.Pointer(buf))) + r1, _, e1 := syscall.SyscallN(procCryptGenRandom.Addr(), uintptr(provhandle), uintptr(buflen), uintptr(unsafe.Pointer(buf))) if r1 == 0 { err = errnoErr(e1) } @@ -744,7 +750,7 @@ func CryptGenRandom(provhandle Handle, buflen uint32, buf *byte) (err error) { } func CryptReleaseContext(provhandle Handle, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procCryptReleaseContext.Addr(), 2, uintptr(provhandle), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procCryptReleaseContext.Addr(), uintptr(provhandle), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -752,7 +758,7 @@ func CryptReleaseContext(provhandle Handle, flags uint32) (err error) { } func DeleteService(service Handle) (err error) { - r1, _, e1 := syscall.Syscall(procDeleteService.Addr(), 1, uintptr(service), 0, 0) + r1, _, e1 := syscall.SyscallN(procDeleteService.Addr(), uintptr(service)) if r1 == 0 { err = errnoErr(e1) } @@ -760,7 +766,7 @@ func DeleteService(service Handle) (err error) { } func DeregisterEventSource(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procDeregisterEventSource.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procDeregisterEventSource.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -768,7 +774,7 @@ func DeregisterEventSource(handle Handle) (err error) { } func DuplicateTokenEx(existingToken Token, desiredAccess uint32, tokenAttributes *SecurityAttributes, impersonationLevel uint32, tokenType uint32, newToken *Token) (err error) { - r1, _, e1 := syscall.Syscall6(procDuplicateTokenEx.Addr(), 6, uintptr(existingToken), uintptr(desiredAccess), uintptr(unsafe.Pointer(tokenAttributes)), uintptr(impersonationLevel), uintptr(tokenType), uintptr(unsafe.Pointer(newToken))) + r1, _, e1 := syscall.SyscallN(procDuplicateTokenEx.Addr(), uintptr(existingToken), uintptr(desiredAccess), uintptr(unsafe.Pointer(tokenAttributes)), uintptr(impersonationLevel), uintptr(tokenType), uintptr(unsafe.Pointer(newToken))) if r1 == 0 { err = errnoErr(e1) } @@ -776,7 +782,7 @@ func DuplicateTokenEx(existingToken Token, desiredAccess uint32, tokenAttributes } func EnumDependentServices(service Handle, activityState uint32, services *ENUM_SERVICE_STATUS, buffSize uint32, bytesNeeded *uint32, servicesReturned *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procEnumDependentServicesW.Addr(), 6, uintptr(service), uintptr(activityState), uintptr(unsafe.Pointer(services)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded)), uintptr(unsafe.Pointer(servicesReturned))) + r1, _, e1 := syscall.SyscallN(procEnumDependentServicesW.Addr(), uintptr(service), uintptr(activityState), uintptr(unsafe.Pointer(services)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded)), uintptr(unsafe.Pointer(servicesReturned))) if r1 == 0 { err = errnoErr(e1) } @@ -784,7 +790,7 @@ func EnumDependentServices(service Handle, activityState uint32, services *ENUM_ } func EnumServicesStatusEx(mgr Handle, infoLevel uint32, serviceType uint32, serviceState uint32, services *byte, bufSize uint32, bytesNeeded *uint32, servicesReturned *uint32, resumeHandle *uint32, groupName *uint16) (err error) { - r1, _, e1 := syscall.Syscall12(procEnumServicesStatusExW.Addr(), 10, uintptr(mgr), uintptr(infoLevel), uintptr(serviceType), uintptr(serviceState), uintptr(unsafe.Pointer(services)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded)), uintptr(unsafe.Pointer(servicesReturned)), uintptr(unsafe.Pointer(resumeHandle)), uintptr(unsafe.Pointer(groupName)), 0, 0) + r1, _, e1 := syscall.SyscallN(procEnumServicesStatusExW.Addr(), uintptr(mgr), uintptr(infoLevel), uintptr(serviceType), uintptr(serviceState), uintptr(unsafe.Pointer(services)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded)), uintptr(unsafe.Pointer(servicesReturned)), uintptr(unsafe.Pointer(resumeHandle)), uintptr(unsafe.Pointer(groupName))) if r1 == 0 { err = errnoErr(e1) } @@ -792,13 +798,13 @@ func EnumServicesStatusEx(mgr Handle, infoLevel uint32, serviceType uint32, serv } func EqualSid(sid1 *SID, sid2 *SID) (isEqual bool) { - r0, _, _ := syscall.Syscall(procEqualSid.Addr(), 2, uintptr(unsafe.Pointer(sid1)), uintptr(unsafe.Pointer(sid2)), 0) + r0, _, _ := syscall.SyscallN(procEqualSid.Addr(), uintptr(unsafe.Pointer(sid1)), uintptr(unsafe.Pointer(sid2))) isEqual = r0 != 0 return } func FreeSid(sid *SID) (err error) { - r1, _, e1 := syscall.Syscall(procFreeSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) + r1, _, e1 := syscall.SyscallN(procFreeSid.Addr(), uintptr(unsafe.Pointer(sid))) if r1 != 0 { err = errnoErr(e1) } @@ -806,7 +812,7 @@ func FreeSid(sid *SID) (err error) { } func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) { - r1, _, e1 := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) + r1, _, e1 := syscall.SyscallN(procGetAce.Addr(), uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) if r1 == 0 { err = errnoErr(e1) } @@ -814,7 +820,7 @@ func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) { } func GetLengthSid(sid *SID) (len uint32) { - r0, _, _ := syscall.Syscall(procGetLengthSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetLengthSid.Addr(), uintptr(unsafe.Pointer(sid))) len = uint32(r0) return } @@ -829,7 +835,7 @@ func getNamedSecurityInfo(objectName string, objectType SE_OBJECT_TYPE, security } func _getNamedSecurityInfo(objectName *uint16, objectType SE_OBJECT_TYPE, securityInformation SECURITY_INFORMATION, owner **SID, group **SID, dacl **ACL, sacl **ACL, sd **SECURITY_DESCRIPTOR) (ret error) { - r0, _, _ := syscall.Syscall9(procGetNamedSecurityInfoW.Addr(), 8, uintptr(unsafe.Pointer(objectName)), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(sd)), 0) + r0, _, _ := syscall.SyscallN(procGetNamedSecurityInfoW.Addr(), uintptr(unsafe.Pointer(objectName)), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(sd))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -837,7 +843,7 @@ func _getNamedSecurityInfo(objectName *uint16, objectType SE_OBJECT_TYPE, securi } func getSecurityDescriptorControl(sd *SECURITY_DESCRIPTOR, control *SECURITY_DESCRIPTOR_CONTROL, revision *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetSecurityDescriptorControl.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(control)), uintptr(unsafe.Pointer(revision))) + r1, _, e1 := syscall.SyscallN(procGetSecurityDescriptorControl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(control)), uintptr(unsafe.Pointer(revision))) if r1 == 0 { err = errnoErr(e1) } @@ -853,7 +859,7 @@ func getSecurityDescriptorDacl(sd *SECURITY_DESCRIPTOR, daclPresent *bool, dacl if *daclDefaulted { _p1 = 1 } - r1, _, e1 := syscall.Syscall6(procGetSecurityDescriptorDacl.Addr(), 4, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(&_p0)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(&_p1)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetSecurityDescriptorDacl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(&_p0)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(&_p1))) *daclPresent = _p0 != 0 *daclDefaulted = _p1 != 0 if r1 == 0 { @@ -867,7 +873,7 @@ func getSecurityDescriptorGroup(sd *SECURITY_DESCRIPTOR, group **SID, groupDefau if *groupDefaulted { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procGetSecurityDescriptorGroup.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(&_p0))) + r1, _, e1 := syscall.SyscallN(procGetSecurityDescriptorGroup.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(&_p0))) *groupDefaulted = _p0 != 0 if r1 == 0 { err = errnoErr(e1) @@ -876,7 +882,7 @@ func getSecurityDescriptorGroup(sd *SECURITY_DESCRIPTOR, group **SID, groupDefau } func getSecurityDescriptorLength(sd *SECURITY_DESCRIPTOR) (len uint32) { - r0, _, _ := syscall.Syscall(procGetSecurityDescriptorLength.Addr(), 1, uintptr(unsafe.Pointer(sd)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetSecurityDescriptorLength.Addr(), uintptr(unsafe.Pointer(sd))) len = uint32(r0) return } @@ -886,7 +892,7 @@ func getSecurityDescriptorOwner(sd *SECURITY_DESCRIPTOR, owner **SID, ownerDefau if *ownerDefaulted { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procGetSecurityDescriptorOwner.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(&_p0))) + r1, _, e1 := syscall.SyscallN(procGetSecurityDescriptorOwner.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(&_p0))) *ownerDefaulted = _p0 != 0 if r1 == 0 { err = errnoErr(e1) @@ -895,7 +901,7 @@ func getSecurityDescriptorOwner(sd *SECURITY_DESCRIPTOR, owner **SID, ownerDefau } func getSecurityDescriptorRMControl(sd *SECURITY_DESCRIPTOR, rmControl *uint8) (ret error) { - r0, _, _ := syscall.Syscall(procGetSecurityDescriptorRMControl.Addr(), 2, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(rmControl)), 0) + r0, _, _ := syscall.SyscallN(procGetSecurityDescriptorRMControl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(rmControl))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -911,7 +917,7 @@ func getSecurityDescriptorSacl(sd *SECURITY_DESCRIPTOR, saclPresent *bool, sacl if *saclDefaulted { _p1 = 1 } - r1, _, e1 := syscall.Syscall6(procGetSecurityDescriptorSacl.Addr(), 4, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(&_p0)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(&_p1)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetSecurityDescriptorSacl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(&_p0)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(&_p1))) *saclPresent = _p0 != 0 *saclDefaulted = _p1 != 0 if r1 == 0 { @@ -921,7 +927,7 @@ func getSecurityDescriptorSacl(sd *SECURITY_DESCRIPTOR, saclPresent *bool, sacl } func getSecurityInfo(handle Handle, objectType SE_OBJECT_TYPE, securityInformation SECURITY_INFORMATION, owner **SID, group **SID, dacl **ACL, sacl **ACL, sd **SECURITY_DESCRIPTOR) (ret error) { - r0, _, _ := syscall.Syscall9(procGetSecurityInfo.Addr(), 8, uintptr(handle), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(sd)), 0) + r0, _, _ := syscall.SyscallN(procGetSecurityInfo.Addr(), uintptr(handle), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(sd))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -929,25 +935,25 @@ func getSecurityInfo(handle Handle, objectType SE_OBJECT_TYPE, securityInformati } func getSidIdentifierAuthority(sid *SID) (authority *SidIdentifierAuthority) { - r0, _, _ := syscall.Syscall(procGetSidIdentifierAuthority.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetSidIdentifierAuthority.Addr(), uintptr(unsafe.Pointer(sid))) authority = (*SidIdentifierAuthority)(unsafe.Pointer(r0)) return } func getSidSubAuthority(sid *SID, index uint32) (subAuthority *uint32) { - r0, _, _ := syscall.Syscall(procGetSidSubAuthority.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(index), 0) + r0, _, _ := syscall.SyscallN(procGetSidSubAuthority.Addr(), uintptr(unsafe.Pointer(sid)), uintptr(index)) subAuthority = (*uint32)(unsafe.Pointer(r0)) return } func getSidSubAuthorityCount(sid *SID) (count *uint8) { - r0, _, _ := syscall.Syscall(procGetSidSubAuthorityCount.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetSidSubAuthorityCount.Addr(), uintptr(unsafe.Pointer(sid))) count = (*uint8)(unsafe.Pointer(r0)) return } func GetTokenInformation(token Token, infoClass uint32, info *byte, infoLen uint32, returnedLen *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetTokenInformation.Addr(), 5, uintptr(token), uintptr(infoClass), uintptr(unsafe.Pointer(info)), uintptr(infoLen), uintptr(unsafe.Pointer(returnedLen)), 0) + r1, _, e1 := syscall.SyscallN(procGetTokenInformation.Addr(), uintptr(token), uintptr(infoClass), uintptr(unsafe.Pointer(info)), uintptr(infoLen), uintptr(unsafe.Pointer(returnedLen))) if r1 == 0 { err = errnoErr(e1) } @@ -955,7 +961,7 @@ func GetTokenInformation(token Token, infoClass uint32, info *byte, infoLen uint } func ImpersonateSelf(impersonationlevel uint32) (err error) { - r1, _, e1 := syscall.Syscall(procImpersonateSelf.Addr(), 1, uintptr(impersonationlevel), 0, 0) + r1, _, e1 := syscall.SyscallN(procImpersonateSelf.Addr(), uintptr(impersonationlevel)) if r1 == 0 { err = errnoErr(e1) } @@ -963,7 +969,7 @@ func ImpersonateSelf(impersonationlevel uint32) (err error) { } func initializeSecurityDescriptor(absoluteSD *SECURITY_DESCRIPTOR, revision uint32) (err error) { - r1, _, e1 := syscall.Syscall(procInitializeSecurityDescriptor.Addr(), 2, uintptr(unsafe.Pointer(absoluteSD)), uintptr(revision), 0) + r1, _, e1 := syscall.SyscallN(procInitializeSecurityDescriptor.Addr(), uintptr(unsafe.Pointer(absoluteSD)), uintptr(revision)) if r1 == 0 { err = errnoErr(e1) } @@ -979,7 +985,7 @@ func InitiateSystemShutdownEx(machineName *uint16, message *uint16, timeout uint if rebootAfterShutdown { _p1 = 1 } - r1, _, e1 := syscall.Syscall6(procInitiateSystemShutdownExW.Addr(), 6, uintptr(unsafe.Pointer(machineName)), uintptr(unsafe.Pointer(message)), uintptr(timeout), uintptr(_p0), uintptr(_p1), uintptr(reason)) + r1, _, e1 := syscall.SyscallN(procInitiateSystemShutdownExW.Addr(), uintptr(unsafe.Pointer(machineName)), uintptr(unsafe.Pointer(message)), uintptr(timeout), uintptr(_p0), uintptr(_p1), uintptr(reason)) if r1 == 0 { err = errnoErr(e1) } @@ -987,7 +993,7 @@ func InitiateSystemShutdownEx(machineName *uint16, message *uint16, timeout uint } func isTokenRestricted(tokenHandle Token) (ret bool, err error) { - r0, _, e1 := syscall.Syscall(procIsTokenRestricted.Addr(), 1, uintptr(tokenHandle), 0, 0) + r0, _, e1 := syscall.SyscallN(procIsTokenRestricted.Addr(), uintptr(tokenHandle)) ret = r0 != 0 if !ret { err = errnoErr(e1) @@ -996,25 +1002,25 @@ func isTokenRestricted(tokenHandle Token) (ret bool, err error) { } func isValidSecurityDescriptor(sd *SECURITY_DESCRIPTOR) (isValid bool) { - r0, _, _ := syscall.Syscall(procIsValidSecurityDescriptor.Addr(), 1, uintptr(unsafe.Pointer(sd)), 0, 0) + r0, _, _ := syscall.SyscallN(procIsValidSecurityDescriptor.Addr(), uintptr(unsafe.Pointer(sd))) isValid = r0 != 0 return } func isValidSid(sid *SID) (isValid bool) { - r0, _, _ := syscall.Syscall(procIsValidSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) + r0, _, _ := syscall.SyscallN(procIsValidSid.Addr(), uintptr(unsafe.Pointer(sid))) isValid = r0 != 0 return } func isWellKnownSid(sid *SID, sidType WELL_KNOWN_SID_TYPE) (isWellKnown bool) { - r0, _, _ := syscall.Syscall(procIsWellKnownSid.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(sidType), 0) + r0, _, _ := syscall.SyscallN(procIsWellKnownSid.Addr(), uintptr(unsafe.Pointer(sid)), uintptr(sidType)) isWellKnown = r0 != 0 return } func LookupAccountName(systemName *uint16, accountName *uint16, sid *SID, sidLen *uint32, refdDomainName *uint16, refdDomainNameLen *uint32, use *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procLookupAccountNameW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(accountName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sidLen)), uintptr(unsafe.Pointer(refdDomainName)), uintptr(unsafe.Pointer(refdDomainNameLen)), uintptr(unsafe.Pointer(use)), 0, 0) + r1, _, e1 := syscall.SyscallN(procLookupAccountNameW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(accountName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sidLen)), uintptr(unsafe.Pointer(refdDomainName)), uintptr(unsafe.Pointer(refdDomainNameLen)), uintptr(unsafe.Pointer(use))) if r1 == 0 { err = errnoErr(e1) } @@ -1022,7 +1028,7 @@ func LookupAccountName(systemName *uint16, accountName *uint16, sid *SID, sidLen } func LookupAccountSid(systemName *uint16, sid *SID, name *uint16, nameLen *uint32, refdDomainName *uint16, refdDomainNameLen *uint32, use *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procLookupAccountSidW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(refdDomainName)), uintptr(unsafe.Pointer(refdDomainNameLen)), uintptr(unsafe.Pointer(use)), 0, 0) + r1, _, e1 := syscall.SyscallN(procLookupAccountSidW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(refdDomainName)), uintptr(unsafe.Pointer(refdDomainNameLen)), uintptr(unsafe.Pointer(use))) if r1 == 0 { err = errnoErr(e1) } @@ -1030,7 +1036,7 @@ func LookupAccountSid(systemName *uint16, sid *SID, name *uint16, nameLen *uint3 } func LookupPrivilegeValue(systemname *uint16, name *uint16, luid *LUID) (err error) { - r1, _, e1 := syscall.Syscall(procLookupPrivilegeValueW.Addr(), 3, uintptr(unsafe.Pointer(systemname)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid))) + r1, _, e1 := syscall.SyscallN(procLookupPrivilegeValueW.Addr(), uintptr(unsafe.Pointer(systemname)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid))) if r1 == 0 { err = errnoErr(e1) } @@ -1038,7 +1044,7 @@ func LookupPrivilegeValue(systemname *uint16, name *uint16, luid *LUID) (err err } func makeAbsoluteSD(selfRelativeSD *SECURITY_DESCRIPTOR, absoluteSD *SECURITY_DESCRIPTOR, absoluteSDSize *uint32, dacl *ACL, daclSize *uint32, sacl *ACL, saclSize *uint32, owner *SID, ownerSize *uint32, group *SID, groupSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall12(procMakeAbsoluteSD.Addr(), 11, uintptr(unsafe.Pointer(selfRelativeSD)), uintptr(unsafe.Pointer(absoluteSD)), uintptr(unsafe.Pointer(absoluteSDSize)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(daclSize)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(saclSize)), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(ownerSize)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(groupSize)), 0) + r1, _, e1 := syscall.SyscallN(procMakeAbsoluteSD.Addr(), uintptr(unsafe.Pointer(selfRelativeSD)), uintptr(unsafe.Pointer(absoluteSD)), uintptr(unsafe.Pointer(absoluteSDSize)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(daclSize)), uintptr(unsafe.Pointer(sacl)), uintptr(unsafe.Pointer(saclSize)), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(ownerSize)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(groupSize))) if r1 == 0 { err = errnoErr(e1) } @@ -1046,7 +1052,7 @@ func makeAbsoluteSD(selfRelativeSD *SECURITY_DESCRIPTOR, absoluteSD *SECURITY_DE } func makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procMakeSelfRelativeSD.Addr(), 3, uintptr(unsafe.Pointer(absoluteSD)), uintptr(unsafe.Pointer(selfRelativeSD)), uintptr(unsafe.Pointer(selfRelativeSDSize))) + r1, _, e1 := syscall.SyscallN(procMakeSelfRelativeSD.Addr(), uintptr(unsafe.Pointer(absoluteSD)), uintptr(unsafe.Pointer(selfRelativeSD)), uintptr(unsafe.Pointer(selfRelativeSDSize))) if r1 == 0 { err = errnoErr(e1) } @@ -1054,7 +1060,7 @@ func makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURIT } func NotifyServiceStatusChange(service Handle, notifyMask uint32, notifier *SERVICE_NOTIFY) (ret error) { - r0, _, _ := syscall.Syscall(procNotifyServiceStatusChangeW.Addr(), 3, uintptr(service), uintptr(notifyMask), uintptr(unsafe.Pointer(notifier))) + r0, _, _ := syscall.SyscallN(procNotifyServiceStatusChangeW.Addr(), uintptr(service), uintptr(notifyMask), uintptr(unsafe.Pointer(notifier))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1062,7 +1068,7 @@ func NotifyServiceStatusChange(service Handle, notifyMask uint32, notifier *SERV } func OpenProcessToken(process Handle, access uint32, token *Token) (err error) { - r1, _, e1 := syscall.Syscall(procOpenProcessToken.Addr(), 3, uintptr(process), uintptr(access), uintptr(unsafe.Pointer(token))) + r1, _, e1 := syscall.SyscallN(procOpenProcessToken.Addr(), uintptr(process), uintptr(access), uintptr(unsafe.Pointer(token))) if r1 == 0 { err = errnoErr(e1) } @@ -1070,7 +1076,7 @@ func OpenProcessToken(process Handle, access uint32, token *Token) (err error) { } func OpenSCManager(machineName *uint16, databaseName *uint16, access uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procOpenSCManagerW.Addr(), 3, uintptr(unsafe.Pointer(machineName)), uintptr(unsafe.Pointer(databaseName)), uintptr(access)) + r0, _, e1 := syscall.SyscallN(procOpenSCManagerW.Addr(), uintptr(unsafe.Pointer(machineName)), uintptr(unsafe.Pointer(databaseName)), uintptr(access)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1079,7 +1085,7 @@ func OpenSCManager(machineName *uint16, databaseName *uint16, access uint32) (ha } func OpenService(mgr Handle, serviceName *uint16, access uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procOpenServiceW.Addr(), 3, uintptr(mgr), uintptr(unsafe.Pointer(serviceName)), uintptr(access)) + r0, _, e1 := syscall.SyscallN(procOpenServiceW.Addr(), uintptr(mgr), uintptr(unsafe.Pointer(serviceName)), uintptr(access)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1092,7 +1098,7 @@ func OpenThreadToken(thread Handle, access uint32, openAsSelf bool, token *Token if openAsSelf { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procOpenThreadToken.Addr(), 4, uintptr(thread), uintptr(access), uintptr(_p0), uintptr(unsafe.Pointer(token)), 0, 0) + r1, _, e1 := syscall.SyscallN(procOpenThreadToken.Addr(), uintptr(thread), uintptr(access), uintptr(_p0), uintptr(unsafe.Pointer(token))) if r1 == 0 { err = errnoErr(e1) } @@ -1100,7 +1106,7 @@ func OpenThreadToken(thread Handle, access uint32, openAsSelf bool, token *Token } func QueryServiceConfig2(service Handle, infoLevel uint32, buff *byte, buffSize uint32, bytesNeeded *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryServiceConfig2W.Addr(), 5, uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(buff)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded)), 0) + r1, _, e1 := syscall.SyscallN(procQueryServiceConfig2W.Addr(), uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(buff)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded))) if r1 == 0 { err = errnoErr(e1) } @@ -1108,7 +1114,7 @@ func QueryServiceConfig2(service Handle, infoLevel uint32, buff *byte, buffSize } func QueryServiceConfig(service Handle, serviceConfig *QUERY_SERVICE_CONFIG, bufSize uint32, bytesNeeded *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryServiceConfigW.Addr(), 4, uintptr(service), uintptr(unsafe.Pointer(serviceConfig)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded)), 0, 0) + r1, _, e1 := syscall.SyscallN(procQueryServiceConfigW.Addr(), uintptr(service), uintptr(unsafe.Pointer(serviceConfig)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded))) if r1 == 0 { err = errnoErr(e1) } @@ -1120,7 +1126,7 @@ func QueryServiceDynamicInformation(service Handle, infoLevel uint32, dynamicInf if err != nil { return } - r1, _, e1 := syscall.Syscall(procQueryServiceDynamicInformation.Addr(), 3, uintptr(service), uintptr(infoLevel), uintptr(dynamicInfo)) + r1, _, e1 := syscall.SyscallN(procQueryServiceDynamicInformation.Addr(), uintptr(service), uintptr(infoLevel), uintptr(dynamicInfo)) if r1 == 0 { err = errnoErr(e1) } @@ -1128,7 +1134,7 @@ func QueryServiceDynamicInformation(service Handle, infoLevel uint32, dynamicInf } func QueryServiceLockStatus(mgr Handle, lockStatus *QUERY_SERVICE_LOCK_STATUS, bufSize uint32, bytesNeeded *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryServiceLockStatusW.Addr(), 4, uintptr(mgr), uintptr(unsafe.Pointer(lockStatus)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded)), 0, 0) + r1, _, e1 := syscall.SyscallN(procQueryServiceLockStatusW.Addr(), uintptr(mgr), uintptr(unsafe.Pointer(lockStatus)), uintptr(bufSize), uintptr(unsafe.Pointer(bytesNeeded))) if r1 == 0 { err = errnoErr(e1) } @@ -1136,7 +1142,7 @@ func QueryServiceLockStatus(mgr Handle, lockStatus *QUERY_SERVICE_LOCK_STATUS, b } func QueryServiceStatus(service Handle, status *SERVICE_STATUS) (err error) { - r1, _, e1 := syscall.Syscall(procQueryServiceStatus.Addr(), 2, uintptr(service), uintptr(unsafe.Pointer(status)), 0) + r1, _, e1 := syscall.SyscallN(procQueryServiceStatus.Addr(), uintptr(service), uintptr(unsafe.Pointer(status))) if r1 == 0 { err = errnoErr(e1) } @@ -1144,7 +1150,7 @@ func QueryServiceStatus(service Handle, status *SERVICE_STATUS) (err error) { } func QueryServiceStatusEx(service Handle, infoLevel uint32, buff *byte, buffSize uint32, bytesNeeded *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryServiceStatusEx.Addr(), 5, uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(buff)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded)), 0) + r1, _, e1 := syscall.SyscallN(procQueryServiceStatusEx.Addr(), uintptr(service), uintptr(infoLevel), uintptr(unsafe.Pointer(buff)), uintptr(buffSize), uintptr(unsafe.Pointer(bytesNeeded))) if r1 == 0 { err = errnoErr(e1) } @@ -1152,7 +1158,7 @@ func QueryServiceStatusEx(service Handle, infoLevel uint32, buff *byte, buffSize } func RegCloseKey(key Handle) (regerrno error) { - r0, _, _ := syscall.Syscall(procRegCloseKey.Addr(), 1, uintptr(key), 0, 0) + r0, _, _ := syscall.SyscallN(procRegCloseKey.Addr(), uintptr(key)) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1160,7 +1166,7 @@ func RegCloseKey(key Handle) (regerrno error) { } func RegEnumKeyEx(key Handle, index uint32, name *uint16, nameLen *uint32, reserved *uint32, class *uint16, classLen *uint32, lastWriteTime *Filetime) (regerrno error) { - r0, _, _ := syscall.Syscall9(procRegEnumKeyExW.Addr(), 8, uintptr(key), uintptr(index), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(class)), uintptr(unsafe.Pointer(classLen)), uintptr(unsafe.Pointer(lastWriteTime)), 0) + r0, _, _ := syscall.SyscallN(procRegEnumKeyExW.Addr(), uintptr(key), uintptr(index), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(class)), uintptr(unsafe.Pointer(classLen)), uintptr(unsafe.Pointer(lastWriteTime))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1176,7 +1182,7 @@ func RegNotifyChangeKeyValue(key Handle, watchSubtree bool, notifyFilter uint32, if asynchronous { _p1 = 1 } - r0, _, _ := syscall.Syscall6(procRegNotifyChangeKeyValue.Addr(), 5, uintptr(key), uintptr(_p0), uintptr(notifyFilter), uintptr(event), uintptr(_p1), 0) + r0, _, _ := syscall.SyscallN(procRegNotifyChangeKeyValue.Addr(), uintptr(key), uintptr(_p0), uintptr(notifyFilter), uintptr(event), uintptr(_p1)) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1184,7 +1190,7 @@ func RegNotifyChangeKeyValue(key Handle, watchSubtree bool, notifyFilter uint32, } func RegOpenKeyEx(key Handle, subkey *uint16, options uint32, desiredAccess uint32, result *Handle) (regerrno error) { - r0, _, _ := syscall.Syscall6(procRegOpenKeyExW.Addr(), 5, uintptr(key), uintptr(unsafe.Pointer(subkey)), uintptr(options), uintptr(desiredAccess), uintptr(unsafe.Pointer(result)), 0) + r0, _, _ := syscall.SyscallN(procRegOpenKeyExW.Addr(), uintptr(key), uintptr(unsafe.Pointer(subkey)), uintptr(options), uintptr(desiredAccess), uintptr(unsafe.Pointer(result))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1192,7 +1198,7 @@ func RegOpenKeyEx(key Handle, subkey *uint16, options uint32, desiredAccess uint } func RegQueryInfoKey(key Handle, class *uint16, classLen *uint32, reserved *uint32, subkeysLen *uint32, maxSubkeyLen *uint32, maxClassLen *uint32, valuesLen *uint32, maxValueNameLen *uint32, maxValueLen *uint32, saLen *uint32, lastWriteTime *Filetime) (regerrno error) { - r0, _, _ := syscall.Syscall12(procRegQueryInfoKeyW.Addr(), 12, uintptr(key), uintptr(unsafe.Pointer(class)), uintptr(unsafe.Pointer(classLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(subkeysLen)), uintptr(unsafe.Pointer(maxSubkeyLen)), uintptr(unsafe.Pointer(maxClassLen)), uintptr(unsafe.Pointer(valuesLen)), uintptr(unsafe.Pointer(maxValueNameLen)), uintptr(unsafe.Pointer(maxValueLen)), uintptr(unsafe.Pointer(saLen)), uintptr(unsafe.Pointer(lastWriteTime))) + r0, _, _ := syscall.SyscallN(procRegQueryInfoKeyW.Addr(), uintptr(key), uintptr(unsafe.Pointer(class)), uintptr(unsafe.Pointer(classLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(subkeysLen)), uintptr(unsafe.Pointer(maxSubkeyLen)), uintptr(unsafe.Pointer(maxClassLen)), uintptr(unsafe.Pointer(valuesLen)), uintptr(unsafe.Pointer(maxValueNameLen)), uintptr(unsafe.Pointer(maxValueLen)), uintptr(unsafe.Pointer(saLen)), uintptr(unsafe.Pointer(lastWriteTime))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1200,7 +1206,7 @@ func RegQueryInfoKey(key Handle, class *uint16, classLen *uint32, reserved *uint } func RegQueryValueEx(key Handle, name *uint16, reserved *uint32, valtype *uint32, buf *byte, buflen *uint32) (regerrno error) { - r0, _, _ := syscall.Syscall6(procRegQueryValueExW.Addr(), 6, uintptr(key), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(valtype)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(buflen))) + r0, _, _ := syscall.SyscallN(procRegQueryValueExW.Addr(), uintptr(key), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(valtype)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(buflen))) if r0 != 0 { regerrno = syscall.Errno(r0) } @@ -1208,7 +1214,7 @@ func RegQueryValueEx(key Handle, name *uint16, reserved *uint32, valtype *uint32 } func RegisterEventSource(uncServerName *uint16, sourceName *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procRegisterEventSourceW.Addr(), 2, uintptr(unsafe.Pointer(uncServerName)), uintptr(unsafe.Pointer(sourceName)), 0) + r0, _, e1 := syscall.SyscallN(procRegisterEventSourceW.Addr(), uintptr(unsafe.Pointer(uncServerName)), uintptr(unsafe.Pointer(sourceName))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1217,7 +1223,7 @@ func RegisterEventSource(uncServerName *uint16, sourceName *uint16) (handle Hand } func RegisterServiceCtrlHandlerEx(serviceName *uint16, handlerProc uintptr, context uintptr) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procRegisterServiceCtrlHandlerExW.Addr(), 3, uintptr(unsafe.Pointer(serviceName)), uintptr(handlerProc), uintptr(context)) + r0, _, e1 := syscall.SyscallN(procRegisterServiceCtrlHandlerExW.Addr(), uintptr(unsafe.Pointer(serviceName)), uintptr(handlerProc), uintptr(context)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1226,7 +1232,7 @@ func RegisterServiceCtrlHandlerEx(serviceName *uint16, handlerProc uintptr, cont } func ReportEvent(log Handle, etype uint16, category uint16, eventId uint32, usrSId uintptr, numStrings uint16, dataSize uint32, strings **uint16, rawData *byte) (err error) { - r1, _, e1 := syscall.Syscall9(procReportEventW.Addr(), 9, uintptr(log), uintptr(etype), uintptr(category), uintptr(eventId), uintptr(usrSId), uintptr(numStrings), uintptr(dataSize), uintptr(unsafe.Pointer(strings)), uintptr(unsafe.Pointer(rawData))) + r1, _, e1 := syscall.SyscallN(procReportEventW.Addr(), uintptr(log), uintptr(etype), uintptr(category), uintptr(eventId), uintptr(usrSId), uintptr(numStrings), uintptr(dataSize), uintptr(unsafe.Pointer(strings)), uintptr(unsafe.Pointer(rawData))) if r1 == 0 { err = errnoErr(e1) } @@ -1234,7 +1240,7 @@ func ReportEvent(log Handle, etype uint16, category uint16, eventId uint32, usrS } func RevertToSelf() (err error) { - r1, _, e1 := syscall.Syscall(procRevertToSelf.Addr(), 0, 0, 0, 0) + r1, _, e1 := syscall.SyscallN(procRevertToSelf.Addr()) if r1 == 0 { err = errnoErr(e1) } @@ -1242,7 +1248,7 @@ func RevertToSelf() (err error) { } func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) { - r0, _, _ := syscall.Syscall6(procSetEntriesInAclW.Addr(), 4, uintptr(countExplicitEntries), uintptr(unsafe.Pointer(explicitEntries)), uintptr(unsafe.Pointer(oldACL)), uintptr(unsafe.Pointer(newACL)), 0, 0) + r0, _, _ := syscall.SyscallN(procSetEntriesInAclW.Addr(), uintptr(countExplicitEntries), uintptr(unsafe.Pointer(explicitEntries)), uintptr(unsafe.Pointer(oldACL)), uintptr(unsafe.Pointer(newACL))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1250,7 +1256,7 @@ func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCE } func SetKernelObjectSecurity(handle Handle, securityInformation SECURITY_INFORMATION, securityDescriptor *SECURITY_DESCRIPTOR) (err error) { - r1, _, e1 := syscall.Syscall(procSetKernelObjectSecurity.Addr(), 3, uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor))) + r1, _, e1 := syscall.SyscallN(procSetKernelObjectSecurity.Addr(), uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor))) if r1 == 0 { err = errnoErr(e1) } @@ -1267,7 +1273,7 @@ func SetNamedSecurityInfo(objectName string, objectType SE_OBJECT_TYPE, security } func _SetNamedSecurityInfo(objectName *uint16, objectType SE_OBJECT_TYPE, securityInformation SECURITY_INFORMATION, owner *SID, group *SID, dacl *ACL, sacl *ACL) (ret error) { - r0, _, _ := syscall.Syscall9(procSetNamedSecurityInfoW.Addr(), 7, uintptr(unsafe.Pointer(objectName)), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), 0, 0) + r0, _, _ := syscall.SyscallN(procSetNamedSecurityInfoW.Addr(), uintptr(unsafe.Pointer(objectName)), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1275,7 +1281,7 @@ func _SetNamedSecurityInfo(objectName *uint16, objectType SE_OBJECT_TYPE, securi } func setSecurityDescriptorControl(sd *SECURITY_DESCRIPTOR, controlBitsOfInterest SECURITY_DESCRIPTOR_CONTROL, controlBitsToSet SECURITY_DESCRIPTOR_CONTROL) (err error) { - r1, _, e1 := syscall.Syscall(procSetSecurityDescriptorControl.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(controlBitsOfInterest), uintptr(controlBitsToSet)) + r1, _, e1 := syscall.SyscallN(procSetSecurityDescriptorControl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(controlBitsOfInterest), uintptr(controlBitsToSet)) if r1 == 0 { err = errnoErr(e1) } @@ -1291,7 +1297,7 @@ func setSecurityDescriptorDacl(sd *SECURITY_DESCRIPTOR, daclPresent bool, dacl * if daclDefaulted { _p1 = 1 } - r1, _, e1 := syscall.Syscall6(procSetSecurityDescriptorDacl.Addr(), 4, uintptr(unsafe.Pointer(sd)), uintptr(_p0), uintptr(unsafe.Pointer(dacl)), uintptr(_p1), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetSecurityDescriptorDacl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(_p0), uintptr(unsafe.Pointer(dacl)), uintptr(_p1)) if r1 == 0 { err = errnoErr(e1) } @@ -1303,7 +1309,7 @@ func setSecurityDescriptorGroup(sd *SECURITY_DESCRIPTOR, group *SID, groupDefaul if groupDefaulted { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procSetSecurityDescriptorGroup.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(group)), uintptr(_p0)) + r1, _, e1 := syscall.SyscallN(procSetSecurityDescriptorGroup.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(group)), uintptr(_p0)) if r1 == 0 { err = errnoErr(e1) } @@ -1315,7 +1321,7 @@ func setSecurityDescriptorOwner(sd *SECURITY_DESCRIPTOR, owner *SID, ownerDefaul if ownerDefaulted { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procSetSecurityDescriptorOwner.Addr(), 3, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(owner)), uintptr(_p0)) + r1, _, e1 := syscall.SyscallN(procSetSecurityDescriptorOwner.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(owner)), uintptr(_p0)) if r1 == 0 { err = errnoErr(e1) } @@ -1323,7 +1329,7 @@ func setSecurityDescriptorOwner(sd *SECURITY_DESCRIPTOR, owner *SID, ownerDefaul } func setSecurityDescriptorRMControl(sd *SECURITY_DESCRIPTOR, rmControl *uint8) { - syscall.Syscall(procSetSecurityDescriptorRMControl.Addr(), 2, uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(rmControl)), 0) + syscall.SyscallN(procSetSecurityDescriptorRMControl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(rmControl))) return } @@ -1336,7 +1342,7 @@ func setSecurityDescriptorSacl(sd *SECURITY_DESCRIPTOR, saclPresent bool, sacl * if saclDefaulted { _p1 = 1 } - r1, _, e1 := syscall.Syscall6(procSetSecurityDescriptorSacl.Addr(), 4, uintptr(unsafe.Pointer(sd)), uintptr(_p0), uintptr(unsafe.Pointer(sacl)), uintptr(_p1), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetSecurityDescriptorSacl.Addr(), uintptr(unsafe.Pointer(sd)), uintptr(_p0), uintptr(unsafe.Pointer(sacl)), uintptr(_p1)) if r1 == 0 { err = errnoErr(e1) } @@ -1344,7 +1350,7 @@ func setSecurityDescriptorSacl(sd *SECURITY_DESCRIPTOR, saclPresent bool, sacl * } func SetSecurityInfo(handle Handle, objectType SE_OBJECT_TYPE, securityInformation SECURITY_INFORMATION, owner *SID, group *SID, dacl *ACL, sacl *ACL) (ret error) { - r0, _, _ := syscall.Syscall9(procSetSecurityInfo.Addr(), 7, uintptr(handle), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), 0, 0) + r0, _, _ := syscall.SyscallN(procSetSecurityInfo.Addr(), uintptr(handle), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(owner)), uintptr(unsafe.Pointer(group)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1352,7 +1358,7 @@ func SetSecurityInfo(handle Handle, objectType SE_OBJECT_TYPE, securityInformati } func SetServiceStatus(service Handle, serviceStatus *SERVICE_STATUS) (err error) { - r1, _, e1 := syscall.Syscall(procSetServiceStatus.Addr(), 2, uintptr(service), uintptr(unsafe.Pointer(serviceStatus)), 0) + r1, _, e1 := syscall.SyscallN(procSetServiceStatus.Addr(), uintptr(service), uintptr(unsafe.Pointer(serviceStatus))) if r1 == 0 { err = errnoErr(e1) } @@ -1360,7 +1366,7 @@ func SetServiceStatus(service Handle, serviceStatus *SERVICE_STATUS) (err error) } func SetThreadToken(thread *Handle, token Token) (err error) { - r1, _, e1 := syscall.Syscall(procSetThreadToken.Addr(), 2, uintptr(unsafe.Pointer(thread)), uintptr(token), 0) + r1, _, e1 := syscall.SyscallN(procSetThreadToken.Addr(), uintptr(unsafe.Pointer(thread)), uintptr(token)) if r1 == 0 { err = errnoErr(e1) } @@ -1368,7 +1374,7 @@ func SetThreadToken(thread *Handle, token Token) (err error) { } func SetTokenInformation(token Token, infoClass uint32, info *byte, infoLen uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetTokenInformation.Addr(), 4, uintptr(token), uintptr(infoClass), uintptr(unsafe.Pointer(info)), uintptr(infoLen), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetTokenInformation.Addr(), uintptr(token), uintptr(infoClass), uintptr(unsafe.Pointer(info)), uintptr(infoLen)) if r1 == 0 { err = errnoErr(e1) } @@ -1376,7 +1382,7 @@ func SetTokenInformation(token Token, infoClass uint32, info *byte, infoLen uint } func StartServiceCtrlDispatcher(serviceTable *SERVICE_TABLE_ENTRY) (err error) { - r1, _, e1 := syscall.Syscall(procStartServiceCtrlDispatcherW.Addr(), 1, uintptr(unsafe.Pointer(serviceTable)), 0, 0) + r1, _, e1 := syscall.SyscallN(procStartServiceCtrlDispatcherW.Addr(), uintptr(unsafe.Pointer(serviceTable))) if r1 == 0 { err = errnoErr(e1) } @@ -1384,7 +1390,7 @@ func StartServiceCtrlDispatcher(serviceTable *SERVICE_TABLE_ENTRY) (err error) { } func StartService(service Handle, numArgs uint32, argVectors **uint16) (err error) { - r1, _, e1 := syscall.Syscall(procStartServiceW.Addr(), 3, uintptr(service), uintptr(numArgs), uintptr(unsafe.Pointer(argVectors))) + r1, _, e1 := syscall.SyscallN(procStartServiceW.Addr(), uintptr(service), uintptr(numArgs), uintptr(unsafe.Pointer(argVectors))) if r1 == 0 { err = errnoErr(e1) } @@ -1392,7 +1398,7 @@ func StartService(service Handle, numArgs uint32, argVectors **uint16) (err erro } func CertAddCertificateContextToStore(store Handle, certContext *CertContext, addDisposition uint32, storeContext **CertContext) (err error) { - r1, _, e1 := syscall.Syscall6(procCertAddCertificateContextToStore.Addr(), 4, uintptr(store), uintptr(unsafe.Pointer(certContext)), uintptr(addDisposition), uintptr(unsafe.Pointer(storeContext)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCertAddCertificateContextToStore.Addr(), uintptr(store), uintptr(unsafe.Pointer(certContext)), uintptr(addDisposition), uintptr(unsafe.Pointer(storeContext))) if r1 == 0 { err = errnoErr(e1) } @@ -1400,7 +1406,7 @@ func CertAddCertificateContextToStore(store Handle, certContext *CertContext, ad } func CertCloseStore(store Handle, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procCertCloseStore.Addr(), 2, uintptr(store), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procCertCloseStore.Addr(), uintptr(store), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -1408,7 +1414,7 @@ func CertCloseStore(store Handle, flags uint32) (err error) { } func CertCreateCertificateContext(certEncodingType uint32, certEncoded *byte, encodedLen uint32) (context *CertContext, err error) { - r0, _, e1 := syscall.Syscall(procCertCreateCertificateContext.Addr(), 3, uintptr(certEncodingType), uintptr(unsafe.Pointer(certEncoded)), uintptr(encodedLen)) + r0, _, e1 := syscall.SyscallN(procCertCreateCertificateContext.Addr(), uintptr(certEncodingType), uintptr(unsafe.Pointer(certEncoded)), uintptr(encodedLen)) context = (*CertContext)(unsafe.Pointer(r0)) if context == nil { err = errnoErr(e1) @@ -1417,7 +1423,7 @@ func CertCreateCertificateContext(certEncodingType uint32, certEncoded *byte, en } func CertDeleteCertificateFromStore(certContext *CertContext) (err error) { - r1, _, e1 := syscall.Syscall(procCertDeleteCertificateFromStore.Addr(), 1, uintptr(unsafe.Pointer(certContext)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCertDeleteCertificateFromStore.Addr(), uintptr(unsafe.Pointer(certContext))) if r1 == 0 { err = errnoErr(e1) } @@ -1425,13 +1431,13 @@ func CertDeleteCertificateFromStore(certContext *CertContext) (err error) { } func CertDuplicateCertificateContext(certContext *CertContext) (dupContext *CertContext) { - r0, _, _ := syscall.Syscall(procCertDuplicateCertificateContext.Addr(), 1, uintptr(unsafe.Pointer(certContext)), 0, 0) + r0, _, _ := syscall.SyscallN(procCertDuplicateCertificateContext.Addr(), uintptr(unsafe.Pointer(certContext))) dupContext = (*CertContext)(unsafe.Pointer(r0)) return } func CertEnumCertificatesInStore(store Handle, prevContext *CertContext) (context *CertContext, err error) { - r0, _, e1 := syscall.Syscall(procCertEnumCertificatesInStore.Addr(), 2, uintptr(store), uintptr(unsafe.Pointer(prevContext)), 0) + r0, _, e1 := syscall.SyscallN(procCertEnumCertificatesInStore.Addr(), uintptr(store), uintptr(unsafe.Pointer(prevContext))) context = (*CertContext)(unsafe.Pointer(r0)) if context == nil { err = errnoErr(e1) @@ -1440,7 +1446,7 @@ func CertEnumCertificatesInStore(store Handle, prevContext *CertContext) (contex } func CertFindCertificateInStore(store Handle, certEncodingType uint32, findFlags uint32, findType uint32, findPara unsafe.Pointer, prevCertContext *CertContext) (cert *CertContext, err error) { - r0, _, e1 := syscall.Syscall6(procCertFindCertificateInStore.Addr(), 6, uintptr(store), uintptr(certEncodingType), uintptr(findFlags), uintptr(findType), uintptr(findPara), uintptr(unsafe.Pointer(prevCertContext))) + r0, _, e1 := syscall.SyscallN(procCertFindCertificateInStore.Addr(), uintptr(store), uintptr(certEncodingType), uintptr(findFlags), uintptr(findType), uintptr(findPara), uintptr(unsafe.Pointer(prevCertContext))) cert = (*CertContext)(unsafe.Pointer(r0)) if cert == nil { err = errnoErr(e1) @@ -1449,7 +1455,7 @@ func CertFindCertificateInStore(store Handle, certEncodingType uint32, findFlags } func CertFindChainInStore(store Handle, certEncodingType uint32, findFlags uint32, findType uint32, findPara unsafe.Pointer, prevChainContext *CertChainContext) (certchain *CertChainContext, err error) { - r0, _, e1 := syscall.Syscall6(procCertFindChainInStore.Addr(), 6, uintptr(store), uintptr(certEncodingType), uintptr(findFlags), uintptr(findType), uintptr(findPara), uintptr(unsafe.Pointer(prevChainContext))) + r0, _, e1 := syscall.SyscallN(procCertFindChainInStore.Addr(), uintptr(store), uintptr(certEncodingType), uintptr(findFlags), uintptr(findType), uintptr(findPara), uintptr(unsafe.Pointer(prevChainContext))) certchain = (*CertChainContext)(unsafe.Pointer(r0)) if certchain == nil { err = errnoErr(e1) @@ -1458,18 +1464,18 @@ func CertFindChainInStore(store Handle, certEncodingType uint32, findFlags uint3 } func CertFindExtension(objId *byte, countExtensions uint32, extensions *CertExtension) (ret *CertExtension) { - r0, _, _ := syscall.Syscall(procCertFindExtension.Addr(), 3, uintptr(unsafe.Pointer(objId)), uintptr(countExtensions), uintptr(unsafe.Pointer(extensions))) + r0, _, _ := syscall.SyscallN(procCertFindExtension.Addr(), uintptr(unsafe.Pointer(objId)), uintptr(countExtensions), uintptr(unsafe.Pointer(extensions))) ret = (*CertExtension)(unsafe.Pointer(r0)) return } func CertFreeCertificateChain(ctx *CertChainContext) { - syscall.Syscall(procCertFreeCertificateChain.Addr(), 1, uintptr(unsafe.Pointer(ctx)), 0, 0) + syscall.SyscallN(procCertFreeCertificateChain.Addr(), uintptr(unsafe.Pointer(ctx))) return } func CertFreeCertificateContext(ctx *CertContext) (err error) { - r1, _, e1 := syscall.Syscall(procCertFreeCertificateContext.Addr(), 1, uintptr(unsafe.Pointer(ctx)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCertFreeCertificateContext.Addr(), uintptr(unsafe.Pointer(ctx))) if r1 == 0 { err = errnoErr(e1) } @@ -1477,7 +1483,7 @@ func CertFreeCertificateContext(ctx *CertContext) (err error) { } func CertGetCertificateChain(engine Handle, leaf *CertContext, time *Filetime, additionalStore Handle, para *CertChainPara, flags uint32, reserved uintptr, chainCtx **CertChainContext) (err error) { - r1, _, e1 := syscall.Syscall9(procCertGetCertificateChain.Addr(), 8, uintptr(engine), uintptr(unsafe.Pointer(leaf)), uintptr(unsafe.Pointer(time)), uintptr(additionalStore), uintptr(unsafe.Pointer(para)), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(chainCtx)), 0) + r1, _, e1 := syscall.SyscallN(procCertGetCertificateChain.Addr(), uintptr(engine), uintptr(unsafe.Pointer(leaf)), uintptr(unsafe.Pointer(time)), uintptr(additionalStore), uintptr(unsafe.Pointer(para)), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(chainCtx))) if r1 == 0 { err = errnoErr(e1) } @@ -1485,13 +1491,13 @@ func CertGetCertificateChain(engine Handle, leaf *CertContext, time *Filetime, a } func CertGetNameString(certContext *CertContext, nameType uint32, flags uint32, typePara unsafe.Pointer, name *uint16, size uint32) (chars uint32) { - r0, _, _ := syscall.Syscall6(procCertGetNameStringW.Addr(), 6, uintptr(unsafe.Pointer(certContext)), uintptr(nameType), uintptr(flags), uintptr(typePara), uintptr(unsafe.Pointer(name)), uintptr(size)) + r0, _, _ := syscall.SyscallN(procCertGetNameStringW.Addr(), uintptr(unsafe.Pointer(certContext)), uintptr(nameType), uintptr(flags), uintptr(typePara), uintptr(unsafe.Pointer(name)), uintptr(size)) chars = uint32(r0) return } func CertOpenStore(storeProvider uintptr, msgAndCertEncodingType uint32, cryptProv uintptr, flags uint32, para uintptr) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCertOpenStore.Addr(), 5, uintptr(storeProvider), uintptr(msgAndCertEncodingType), uintptr(cryptProv), uintptr(flags), uintptr(para), 0) + r0, _, e1 := syscall.SyscallN(procCertOpenStore.Addr(), uintptr(storeProvider), uintptr(msgAndCertEncodingType), uintptr(cryptProv), uintptr(flags), uintptr(para)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1500,7 +1506,7 @@ func CertOpenStore(storeProvider uintptr, msgAndCertEncodingType uint32, cryptPr } func CertOpenSystemStore(hprov Handle, name *uint16) (store Handle, err error) { - r0, _, e1 := syscall.Syscall(procCertOpenSystemStoreW.Addr(), 2, uintptr(hprov), uintptr(unsafe.Pointer(name)), 0) + r0, _, e1 := syscall.SyscallN(procCertOpenSystemStoreW.Addr(), uintptr(hprov), uintptr(unsafe.Pointer(name))) store = Handle(r0) if store == 0 { err = errnoErr(e1) @@ -1509,7 +1515,7 @@ func CertOpenSystemStore(hprov Handle, name *uint16) (store Handle, err error) { } func CertVerifyCertificateChainPolicy(policyOID uintptr, chain *CertChainContext, para *CertChainPolicyPara, status *CertChainPolicyStatus) (err error) { - r1, _, e1 := syscall.Syscall6(procCertVerifyCertificateChainPolicy.Addr(), 4, uintptr(policyOID), uintptr(unsafe.Pointer(chain)), uintptr(unsafe.Pointer(para)), uintptr(unsafe.Pointer(status)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCertVerifyCertificateChainPolicy.Addr(), uintptr(policyOID), uintptr(unsafe.Pointer(chain)), uintptr(unsafe.Pointer(para)), uintptr(unsafe.Pointer(status))) if r1 == 0 { err = errnoErr(e1) } @@ -1521,7 +1527,7 @@ func CryptAcquireCertificatePrivateKey(cert *CertContext, flags uint32, paramete if *callerFreeProvOrNCryptKey { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procCryptAcquireCertificatePrivateKey.Addr(), 6, uintptr(unsafe.Pointer(cert)), uintptr(flags), uintptr(parameters), uintptr(unsafe.Pointer(cryptProvOrNCryptKey)), uintptr(unsafe.Pointer(keySpec)), uintptr(unsafe.Pointer(&_p0))) + r1, _, e1 := syscall.SyscallN(procCryptAcquireCertificatePrivateKey.Addr(), uintptr(unsafe.Pointer(cert)), uintptr(flags), uintptr(parameters), uintptr(unsafe.Pointer(cryptProvOrNCryptKey)), uintptr(unsafe.Pointer(keySpec)), uintptr(unsafe.Pointer(&_p0))) *callerFreeProvOrNCryptKey = _p0 != 0 if r1 == 0 { err = errnoErr(e1) @@ -1530,7 +1536,7 @@ func CryptAcquireCertificatePrivateKey(cert *CertContext, flags uint32, paramete } func CryptDecodeObject(encodingType uint32, structType *byte, encodedBytes *byte, lenEncodedBytes uint32, flags uint32, decoded unsafe.Pointer, decodedLen *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procCryptDecodeObject.Addr(), 7, uintptr(encodingType), uintptr(unsafe.Pointer(structType)), uintptr(unsafe.Pointer(encodedBytes)), uintptr(lenEncodedBytes), uintptr(flags), uintptr(decoded), uintptr(unsafe.Pointer(decodedLen)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCryptDecodeObject.Addr(), uintptr(encodingType), uintptr(unsafe.Pointer(structType)), uintptr(unsafe.Pointer(encodedBytes)), uintptr(lenEncodedBytes), uintptr(flags), uintptr(decoded), uintptr(unsafe.Pointer(decodedLen))) if r1 == 0 { err = errnoErr(e1) } @@ -1538,7 +1544,7 @@ func CryptDecodeObject(encodingType uint32, structType *byte, encodedBytes *byte } func CryptProtectData(dataIn *DataBlob, name *uint16, optionalEntropy *DataBlob, reserved uintptr, promptStruct *CryptProtectPromptStruct, flags uint32, dataOut *DataBlob) (err error) { - r1, _, e1 := syscall.Syscall9(procCryptProtectData.Addr(), 7, uintptr(unsafe.Pointer(dataIn)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(optionalEntropy)), uintptr(reserved), uintptr(unsafe.Pointer(promptStruct)), uintptr(flags), uintptr(unsafe.Pointer(dataOut)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCryptProtectData.Addr(), uintptr(unsafe.Pointer(dataIn)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(optionalEntropy)), uintptr(reserved), uintptr(unsafe.Pointer(promptStruct)), uintptr(flags), uintptr(unsafe.Pointer(dataOut))) if r1 == 0 { err = errnoErr(e1) } @@ -1546,7 +1552,7 @@ func CryptProtectData(dataIn *DataBlob, name *uint16, optionalEntropy *DataBlob, } func CryptQueryObject(objectType uint32, object unsafe.Pointer, expectedContentTypeFlags uint32, expectedFormatTypeFlags uint32, flags uint32, msgAndCertEncodingType *uint32, contentType *uint32, formatType *uint32, certStore *Handle, msg *Handle, context *unsafe.Pointer) (err error) { - r1, _, e1 := syscall.Syscall12(procCryptQueryObject.Addr(), 11, uintptr(objectType), uintptr(object), uintptr(expectedContentTypeFlags), uintptr(expectedFormatTypeFlags), uintptr(flags), uintptr(unsafe.Pointer(msgAndCertEncodingType)), uintptr(unsafe.Pointer(contentType)), uintptr(unsafe.Pointer(formatType)), uintptr(unsafe.Pointer(certStore)), uintptr(unsafe.Pointer(msg)), uintptr(unsafe.Pointer(context)), 0) + r1, _, e1 := syscall.SyscallN(procCryptQueryObject.Addr(), uintptr(objectType), uintptr(object), uintptr(expectedContentTypeFlags), uintptr(expectedFormatTypeFlags), uintptr(flags), uintptr(unsafe.Pointer(msgAndCertEncodingType)), uintptr(unsafe.Pointer(contentType)), uintptr(unsafe.Pointer(formatType)), uintptr(unsafe.Pointer(certStore)), uintptr(unsafe.Pointer(msg)), uintptr(unsafe.Pointer(context))) if r1 == 0 { err = errnoErr(e1) } @@ -1554,7 +1560,7 @@ func CryptQueryObject(objectType uint32, object unsafe.Pointer, expectedContentT } func CryptUnprotectData(dataIn *DataBlob, name **uint16, optionalEntropy *DataBlob, reserved uintptr, promptStruct *CryptProtectPromptStruct, flags uint32, dataOut *DataBlob) (err error) { - r1, _, e1 := syscall.Syscall9(procCryptUnprotectData.Addr(), 7, uintptr(unsafe.Pointer(dataIn)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(optionalEntropy)), uintptr(reserved), uintptr(unsafe.Pointer(promptStruct)), uintptr(flags), uintptr(unsafe.Pointer(dataOut)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCryptUnprotectData.Addr(), uintptr(unsafe.Pointer(dataIn)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(optionalEntropy)), uintptr(reserved), uintptr(unsafe.Pointer(promptStruct)), uintptr(flags), uintptr(unsafe.Pointer(dataOut))) if r1 == 0 { err = errnoErr(e1) } @@ -1562,7 +1568,7 @@ func CryptUnprotectData(dataIn *DataBlob, name **uint16, optionalEntropy *DataBl } func PFXImportCertStore(pfx *CryptDataBlob, password *uint16, flags uint32) (store Handle, err error) { - r0, _, e1 := syscall.Syscall(procPFXImportCertStore.Addr(), 3, uintptr(unsafe.Pointer(pfx)), uintptr(unsafe.Pointer(password)), uintptr(flags)) + r0, _, e1 := syscall.SyscallN(procPFXImportCertStore.Addr(), uintptr(unsafe.Pointer(pfx)), uintptr(unsafe.Pointer(password)), uintptr(flags)) store = Handle(r0) if store == 0 { err = errnoErr(e1) @@ -1571,7 +1577,7 @@ func PFXImportCertStore(pfx *CryptDataBlob, password *uint16, flags uint32) (sto } func DnsNameCompare(name1 *uint16, name2 *uint16) (same bool) { - r0, _, _ := syscall.Syscall(procDnsNameCompare_W.Addr(), 2, uintptr(unsafe.Pointer(name1)), uintptr(unsafe.Pointer(name2)), 0) + r0, _, _ := syscall.SyscallN(procDnsNameCompare_W.Addr(), uintptr(unsafe.Pointer(name1)), uintptr(unsafe.Pointer(name2))) same = r0 != 0 return } @@ -1586,7 +1592,7 @@ func DnsQuery(name string, qtype uint16, options uint32, extra *byte, qrs **DNSR } func _DnsQuery(name *uint16, qtype uint16, options uint32, extra *byte, qrs **DNSRecord, pr *byte) (status error) { - r0, _, _ := syscall.Syscall6(procDnsQuery_W.Addr(), 6, uintptr(unsafe.Pointer(name)), uintptr(qtype), uintptr(options), uintptr(unsafe.Pointer(extra)), uintptr(unsafe.Pointer(qrs)), uintptr(unsafe.Pointer(pr))) + r0, _, _ := syscall.SyscallN(procDnsQuery_W.Addr(), uintptr(unsafe.Pointer(name)), uintptr(qtype), uintptr(options), uintptr(unsafe.Pointer(extra)), uintptr(unsafe.Pointer(qrs)), uintptr(unsafe.Pointer(pr))) if r0 != 0 { status = syscall.Errno(r0) } @@ -1594,12 +1600,12 @@ func _DnsQuery(name *uint16, qtype uint16, options uint32, extra *byte, qrs **DN } func DnsRecordListFree(rl *DNSRecord, freetype uint32) { - syscall.Syscall(procDnsRecordListFree.Addr(), 2, uintptr(unsafe.Pointer(rl)), uintptr(freetype), 0) + syscall.SyscallN(procDnsRecordListFree.Addr(), uintptr(unsafe.Pointer(rl)), uintptr(freetype)) return } func DwmGetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, size uint32) (ret error) { - r0, _, _ := syscall.Syscall6(procDwmGetWindowAttribute.Addr(), 4, uintptr(hwnd), uintptr(attribute), uintptr(value), uintptr(size), 0, 0) + r0, _, _ := syscall.SyscallN(procDwmGetWindowAttribute.Addr(), uintptr(hwnd), uintptr(attribute), uintptr(value), uintptr(size)) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1607,7 +1613,7 @@ func DwmGetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, si } func DwmSetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, size uint32) (ret error) { - r0, _, _ := syscall.Syscall6(procDwmSetWindowAttribute.Addr(), 4, uintptr(hwnd), uintptr(attribute), uintptr(value), uintptr(size), 0, 0) + r0, _, _ := syscall.SyscallN(procDwmSetWindowAttribute.Addr(), uintptr(hwnd), uintptr(attribute), uintptr(value), uintptr(size)) if r0 != 0 { ret = syscall.Errno(r0) } @@ -1615,15 +1621,20 @@ func DwmSetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, si } func CancelMibChangeNotify2(notificationHandle Handle) (errcode error) { - r0, _, _ := syscall.Syscall(procCancelMibChangeNotify2.Addr(), 1, uintptr(notificationHandle), 0, 0) + r0, _, _ := syscall.SyscallN(procCancelMibChangeNotify2.Addr(), uintptr(notificationHandle)) if r0 != 0 { errcode = syscall.Errno(r0) } return } +func FreeMibTable(memory unsafe.Pointer) { + syscall.SyscallN(procFreeMibTable.Addr(), uintptr(memory)) + return +} + func GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapterAddresses *IpAdapterAddresses, sizePointer *uint32) (errcode error) { - r0, _, _ := syscall.Syscall6(procGetAdaptersAddresses.Addr(), 5, uintptr(family), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(adapterAddresses)), uintptr(unsafe.Pointer(sizePointer)), 0) + r0, _, _ := syscall.SyscallN(procGetAdaptersAddresses.Addr(), uintptr(family), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(adapterAddresses)), uintptr(unsafe.Pointer(sizePointer))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1631,7 +1642,7 @@ func GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapter } func GetAdaptersInfo(ai *IpAdapterInfo, ol *uint32) (errcode error) { - r0, _, _ := syscall.Syscall(procGetAdaptersInfo.Addr(), 2, uintptr(unsafe.Pointer(ai)), uintptr(unsafe.Pointer(ol)), 0) + r0, _, _ := syscall.SyscallN(procGetAdaptersInfo.Addr(), uintptr(unsafe.Pointer(ai)), uintptr(unsafe.Pointer(ol))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1639,7 +1650,7 @@ func GetAdaptersInfo(ai *IpAdapterInfo, ol *uint32) (errcode error) { } func getBestInterfaceEx(sockaddr unsafe.Pointer, pdwBestIfIndex *uint32) (errcode error) { - r0, _, _ := syscall.Syscall(procGetBestInterfaceEx.Addr(), 2, uintptr(sockaddr), uintptr(unsafe.Pointer(pdwBestIfIndex)), 0) + r0, _, _ := syscall.SyscallN(procGetBestInterfaceEx.Addr(), uintptr(sockaddr), uintptr(unsafe.Pointer(pdwBestIfIndex))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1647,7 +1658,7 @@ func getBestInterfaceEx(sockaddr unsafe.Pointer, pdwBestIfIndex *uint32) (errcod } func GetIfEntry(pIfRow *MibIfRow) (errcode error) { - r0, _, _ := syscall.Syscall(procGetIfEntry.Addr(), 1, uintptr(unsafe.Pointer(pIfRow)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetIfEntry.Addr(), uintptr(unsafe.Pointer(pIfRow))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1655,7 +1666,23 @@ func GetIfEntry(pIfRow *MibIfRow) (errcode error) { } func GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) { - r0, _, _ := syscall.Syscall(procGetIfEntry2Ex.Addr(), 2, uintptr(level), uintptr(unsafe.Pointer(row)), 0) + r0, _, _ := syscall.SyscallN(procGetIfEntry2Ex.Addr(), uintptr(level), uintptr(unsafe.Pointer(row))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func GetIpForwardEntry2(row *MibIpForwardRow2) (errcode error) { + r0, _, _ := syscall.SyscallN(procGetIpForwardEntry2.Addr(), uintptr(unsafe.Pointer(row))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func GetIpForwardTable2(family uint16, table **MibIpForwardTable2) (errcode error) { + r0, _, _ := syscall.SyscallN(procGetIpForwardTable2.Addr(), uintptr(family), uintptr(unsafe.Pointer(table))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1663,7 +1690,7 @@ func GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) { } func GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) { - r0, _, _ := syscall.Syscall(procGetUnicastIpAddressEntry.Addr(), 1, uintptr(unsafe.Pointer(row)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetUnicastIpAddressEntry.Addr(), uintptr(unsafe.Pointer(row))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1675,7 +1702,19 @@ func NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsa if initialNotification { _p0 = 1 } - r0, _, _ := syscall.Syscall6(procNotifyIpInterfaceChange.Addr(), 5, uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle)), 0) + r0, _, _ := syscall.SyscallN(procNotifyIpInterfaceChange.Addr(), uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func NotifyRouteChange2(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) { + var _p0 uint32 + if initialNotification { + _p0 = 1 + } + r0, _, _ := syscall.SyscallN(procNotifyRouteChange2.Addr(), uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1687,7 +1726,7 @@ func NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext if initialNotification { _p0 = 1 } - r0, _, _ := syscall.Syscall6(procNotifyUnicastIpAddressChange.Addr(), 5, uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle)), 0) + r0, _, _ := syscall.SyscallN(procNotifyUnicastIpAddressChange.Addr(), uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle))) if r0 != 0 { errcode = syscall.Errno(r0) } @@ -1695,7 +1734,7 @@ func NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext } func AddDllDirectory(path *uint16) (cookie uintptr, err error) { - r0, _, e1 := syscall.Syscall(procAddDllDirectory.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) + r0, _, e1 := syscall.SyscallN(procAddDllDirectory.Addr(), uintptr(unsafe.Pointer(path))) cookie = uintptr(r0) if cookie == 0 { err = errnoErr(e1) @@ -1704,7 +1743,7 @@ func AddDllDirectory(path *uint16) (cookie uintptr, err error) { } func AssignProcessToJobObject(job Handle, process Handle) (err error) { - r1, _, e1 := syscall.Syscall(procAssignProcessToJobObject.Addr(), 2, uintptr(job), uintptr(process), 0) + r1, _, e1 := syscall.SyscallN(procAssignProcessToJobObject.Addr(), uintptr(job), uintptr(process)) if r1 == 0 { err = errnoErr(e1) } @@ -1712,7 +1751,7 @@ func AssignProcessToJobObject(job Handle, process Handle) (err error) { } func CancelIo(s Handle) (err error) { - r1, _, e1 := syscall.Syscall(procCancelIo.Addr(), 1, uintptr(s), 0, 0) + r1, _, e1 := syscall.SyscallN(procCancelIo.Addr(), uintptr(s)) if r1 == 0 { err = errnoErr(e1) } @@ -1720,7 +1759,7 @@ func CancelIo(s Handle) (err error) { } func CancelIoEx(s Handle, o *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall(procCancelIoEx.Addr(), 2, uintptr(s), uintptr(unsafe.Pointer(o)), 0) + r1, _, e1 := syscall.SyscallN(procCancelIoEx.Addr(), uintptr(s), uintptr(unsafe.Pointer(o))) if r1 == 0 { err = errnoErr(e1) } @@ -1728,7 +1767,7 @@ func CancelIoEx(s Handle, o *Overlapped) (err error) { } func ClearCommBreak(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procClearCommBreak.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procClearCommBreak.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -1736,7 +1775,7 @@ func ClearCommBreak(handle Handle) (err error) { } func ClearCommError(handle Handle, lpErrors *uint32, lpStat *ComStat) (err error) { - r1, _, e1 := syscall.Syscall(procClearCommError.Addr(), 3, uintptr(handle), uintptr(unsafe.Pointer(lpErrors)), uintptr(unsafe.Pointer(lpStat))) + r1, _, e1 := syscall.SyscallN(procClearCommError.Addr(), uintptr(handle), uintptr(unsafe.Pointer(lpErrors)), uintptr(unsafe.Pointer(lpStat))) if r1 == 0 { err = errnoErr(e1) } @@ -1744,7 +1783,7 @@ func ClearCommError(handle Handle, lpErrors *uint32, lpStat *ComStat) (err error } func CloseHandle(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procCloseHandle.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procCloseHandle.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -1752,12 +1791,12 @@ func CloseHandle(handle Handle) (err error) { } func ClosePseudoConsole(console Handle) { - syscall.Syscall(procClosePseudoConsole.Addr(), 1, uintptr(console), 0, 0) + syscall.SyscallN(procClosePseudoConsole.Addr(), uintptr(console)) return } func ConnectNamedPipe(pipe Handle, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall(procConnectNamedPipe.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procConnectNamedPipe.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -1765,7 +1804,7 @@ func ConnectNamedPipe(pipe Handle, overlapped *Overlapped) (err error) { } func CreateDirectory(path *uint16, sa *SecurityAttributes) (err error) { - r1, _, e1 := syscall.Syscall(procCreateDirectoryW.Addr(), 2, uintptr(unsafe.Pointer(path)), uintptr(unsafe.Pointer(sa)), 0) + r1, _, e1 := syscall.SyscallN(procCreateDirectoryW.Addr(), uintptr(unsafe.Pointer(path)), uintptr(unsafe.Pointer(sa))) if r1 == 0 { err = errnoErr(e1) } @@ -1773,7 +1812,7 @@ func CreateDirectory(path *uint16, sa *SecurityAttributes) (err error) { } func CreateEventEx(eventAttrs *SecurityAttributes, name *uint16, flags uint32, desiredAccess uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateEventExW.Addr(), 4, uintptr(unsafe.Pointer(eventAttrs)), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(desiredAccess), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateEventExW.Addr(), uintptr(unsafe.Pointer(eventAttrs)), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(desiredAccess)) handle = Handle(r0) if handle == 0 || e1 == ERROR_ALREADY_EXISTS { err = errnoErr(e1) @@ -1782,7 +1821,7 @@ func CreateEventEx(eventAttrs *SecurityAttributes, name *uint16, flags uint32, d } func CreateEvent(eventAttrs *SecurityAttributes, manualReset uint32, initialState uint32, name *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateEventW.Addr(), 4, uintptr(unsafe.Pointer(eventAttrs)), uintptr(manualReset), uintptr(initialState), uintptr(unsafe.Pointer(name)), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateEventW.Addr(), uintptr(unsafe.Pointer(eventAttrs)), uintptr(manualReset), uintptr(initialState), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 || e1 == ERROR_ALREADY_EXISTS { err = errnoErr(e1) @@ -1791,7 +1830,7 @@ func CreateEvent(eventAttrs *SecurityAttributes, manualReset uint32, initialStat } func CreateFileMapping(fhandle Handle, sa *SecurityAttributes, prot uint32, maxSizeHigh uint32, maxSizeLow uint32, name *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateFileMappingW.Addr(), 6, uintptr(fhandle), uintptr(unsafe.Pointer(sa)), uintptr(prot), uintptr(maxSizeHigh), uintptr(maxSizeLow), uintptr(unsafe.Pointer(name))) + r0, _, e1 := syscall.SyscallN(procCreateFileMappingW.Addr(), uintptr(fhandle), uintptr(unsafe.Pointer(sa)), uintptr(prot), uintptr(maxSizeHigh), uintptr(maxSizeLow), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 || e1 == ERROR_ALREADY_EXISTS { err = errnoErr(e1) @@ -1800,7 +1839,7 @@ func CreateFileMapping(fhandle Handle, sa *SecurityAttributes, prot uint32, maxS } func CreateFile(name *uint16, access uint32, mode uint32, sa *SecurityAttributes, createmode uint32, attrs uint32, templatefile Handle) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall9(procCreateFileW.Addr(), 7, uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateFileW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -1809,7 +1848,7 @@ func CreateFile(name *uint16, access uint32, mode uint32, sa *SecurityAttributes } func CreateHardLink(filename *uint16, existingfilename *uint16, reserved uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procCreateHardLinkW.Addr(), 3, uintptr(unsafe.Pointer(filename)), uintptr(unsafe.Pointer(existingfilename)), uintptr(reserved)) + r1, _, e1 := syscall.SyscallN(procCreateHardLinkW.Addr(), uintptr(unsafe.Pointer(filename)), uintptr(unsafe.Pointer(existingfilename)), uintptr(reserved)) if r1&0xff == 0 { err = errnoErr(e1) } @@ -1817,7 +1856,7 @@ func CreateHardLink(filename *uint16, existingfilename *uint16, reserved uintptr } func CreateIoCompletionPort(filehandle Handle, cphandle Handle, key uintptr, threadcnt uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateIoCompletionPort.Addr(), 4, uintptr(filehandle), uintptr(cphandle), uintptr(key), uintptr(threadcnt), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateIoCompletionPort.Addr(), uintptr(filehandle), uintptr(cphandle), uintptr(key), uintptr(threadcnt)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1826,7 +1865,7 @@ func CreateIoCompletionPort(filehandle Handle, cphandle Handle, key uintptr, thr } func CreateJobObject(jobAttr *SecurityAttributes, name *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procCreateJobObjectW.Addr(), 2, uintptr(unsafe.Pointer(jobAttr)), uintptr(unsafe.Pointer(name)), 0) + r0, _, e1 := syscall.SyscallN(procCreateJobObjectW.Addr(), uintptr(unsafe.Pointer(jobAttr)), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -1835,7 +1874,7 @@ func CreateJobObject(jobAttr *SecurityAttributes, name *uint16) (handle Handle, } func CreateMutexEx(mutexAttrs *SecurityAttributes, name *uint16, flags uint32, desiredAccess uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateMutexExW.Addr(), 4, uintptr(unsafe.Pointer(mutexAttrs)), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(desiredAccess), 0, 0) + r0, _, e1 := syscall.SyscallN(procCreateMutexExW.Addr(), uintptr(unsafe.Pointer(mutexAttrs)), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(desiredAccess)) handle = Handle(r0) if handle == 0 || e1 == ERROR_ALREADY_EXISTS { err = errnoErr(e1) @@ -1848,7 +1887,7 @@ func CreateMutex(mutexAttrs *SecurityAttributes, initialOwner bool, name *uint16 if initialOwner { _p0 = 1 } - r0, _, e1 := syscall.Syscall(procCreateMutexW.Addr(), 3, uintptr(unsafe.Pointer(mutexAttrs)), uintptr(_p0), uintptr(unsafe.Pointer(name))) + r0, _, e1 := syscall.SyscallN(procCreateMutexW.Addr(), uintptr(unsafe.Pointer(mutexAttrs)), uintptr(_p0), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 || e1 == ERROR_ALREADY_EXISTS { err = errnoErr(e1) @@ -1857,7 +1896,7 @@ func CreateMutex(mutexAttrs *SecurityAttributes, initialOwner bool, name *uint16 } func CreateNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *SecurityAttributes) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall9(procCreateNamedPipeW.Addr(), 8, uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa)), 0) + r0, _, e1 := syscall.SyscallN(procCreateNamedPipeW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa))) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -1866,7 +1905,7 @@ func CreateNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances u } func CreatePipe(readhandle *Handle, writehandle *Handle, sa *SecurityAttributes, size uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procCreatePipe.Addr(), 4, uintptr(unsafe.Pointer(readhandle)), uintptr(unsafe.Pointer(writehandle)), uintptr(unsafe.Pointer(sa)), uintptr(size), 0, 0) + r1, _, e1 := syscall.SyscallN(procCreatePipe.Addr(), uintptr(unsafe.Pointer(readhandle)), uintptr(unsafe.Pointer(writehandle)), uintptr(unsafe.Pointer(sa)), uintptr(size)) if r1 == 0 { err = errnoErr(e1) } @@ -1878,7 +1917,7 @@ func CreateProcess(appName *uint16, commandLine *uint16, procSecurity *SecurityA if inheritHandles { _p0 = 1 } - r1, _, e1 := syscall.Syscall12(procCreateProcessW.Addr(), 10, uintptr(unsafe.Pointer(appName)), uintptr(unsafe.Pointer(commandLine)), uintptr(unsafe.Pointer(procSecurity)), uintptr(unsafe.Pointer(threadSecurity)), uintptr(_p0), uintptr(creationFlags), uintptr(unsafe.Pointer(env)), uintptr(unsafe.Pointer(currentDir)), uintptr(unsafe.Pointer(startupInfo)), uintptr(unsafe.Pointer(outProcInfo)), 0, 0) + r1, _, e1 := syscall.SyscallN(procCreateProcessW.Addr(), uintptr(unsafe.Pointer(appName)), uintptr(unsafe.Pointer(commandLine)), uintptr(unsafe.Pointer(procSecurity)), uintptr(unsafe.Pointer(threadSecurity)), uintptr(_p0), uintptr(creationFlags), uintptr(unsafe.Pointer(env)), uintptr(unsafe.Pointer(currentDir)), uintptr(unsafe.Pointer(startupInfo)), uintptr(unsafe.Pointer(outProcInfo))) if r1 == 0 { err = errnoErr(e1) } @@ -1886,7 +1925,7 @@ func CreateProcess(appName *uint16, commandLine *uint16, procSecurity *SecurityA } func createPseudoConsole(size uint32, in Handle, out Handle, flags uint32, pconsole *Handle) (hr error) { - r0, _, _ := syscall.Syscall6(procCreatePseudoConsole.Addr(), 5, uintptr(size), uintptr(in), uintptr(out), uintptr(flags), uintptr(unsafe.Pointer(pconsole)), 0) + r0, _, _ := syscall.SyscallN(procCreatePseudoConsole.Addr(), uintptr(size), uintptr(in), uintptr(out), uintptr(flags), uintptr(unsafe.Pointer(pconsole))) if r0 != 0 { hr = syscall.Errno(r0) } @@ -1894,7 +1933,7 @@ func createPseudoConsole(size uint32, in Handle, out Handle, flags uint32, pcons } func CreateSymbolicLink(symlinkfilename *uint16, targetfilename *uint16, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procCreateSymbolicLinkW.Addr(), 3, uintptr(unsafe.Pointer(symlinkfilename)), uintptr(unsafe.Pointer(targetfilename)), uintptr(flags)) + r1, _, e1 := syscall.SyscallN(procCreateSymbolicLinkW.Addr(), uintptr(unsafe.Pointer(symlinkfilename)), uintptr(unsafe.Pointer(targetfilename)), uintptr(flags)) if r1&0xff == 0 { err = errnoErr(e1) } @@ -1902,7 +1941,7 @@ func CreateSymbolicLink(symlinkfilename *uint16, targetfilename *uint16, flags u } func CreateToolhelp32Snapshot(flags uint32, processId uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procCreateToolhelp32Snapshot.Addr(), 2, uintptr(flags), uintptr(processId), 0) + r0, _, e1 := syscall.SyscallN(procCreateToolhelp32Snapshot.Addr(), uintptr(flags), uintptr(processId)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -1911,7 +1950,7 @@ func CreateToolhelp32Snapshot(flags uint32, processId uint32) (handle Handle, er } func DefineDosDevice(flags uint32, deviceName *uint16, targetPath *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procDefineDosDeviceW.Addr(), 3, uintptr(flags), uintptr(unsafe.Pointer(deviceName)), uintptr(unsafe.Pointer(targetPath))) + r1, _, e1 := syscall.SyscallN(procDefineDosDeviceW.Addr(), uintptr(flags), uintptr(unsafe.Pointer(deviceName)), uintptr(unsafe.Pointer(targetPath))) if r1 == 0 { err = errnoErr(e1) } @@ -1919,7 +1958,7 @@ func DefineDosDevice(flags uint32, deviceName *uint16, targetPath *uint16) (err } func DeleteFile(path *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procDeleteFileW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) + r1, _, e1 := syscall.SyscallN(procDeleteFileW.Addr(), uintptr(unsafe.Pointer(path))) if r1 == 0 { err = errnoErr(e1) } @@ -1927,12 +1966,12 @@ func DeleteFile(path *uint16) (err error) { } func deleteProcThreadAttributeList(attrlist *ProcThreadAttributeList) { - syscall.Syscall(procDeleteProcThreadAttributeList.Addr(), 1, uintptr(unsafe.Pointer(attrlist)), 0, 0) + syscall.SyscallN(procDeleteProcThreadAttributeList.Addr(), uintptr(unsafe.Pointer(attrlist))) return } func DeleteVolumeMountPoint(volumeMountPoint *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procDeleteVolumeMountPointW.Addr(), 1, uintptr(unsafe.Pointer(volumeMountPoint)), 0, 0) + r1, _, e1 := syscall.SyscallN(procDeleteVolumeMountPointW.Addr(), uintptr(unsafe.Pointer(volumeMountPoint))) if r1 == 0 { err = errnoErr(e1) } @@ -1940,7 +1979,7 @@ func DeleteVolumeMountPoint(volumeMountPoint *uint16) (err error) { } func DeviceIoControl(handle Handle, ioControlCode uint32, inBuffer *byte, inBufferSize uint32, outBuffer *byte, outBufferSize uint32, bytesReturned *uint32, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall9(procDeviceIoControl.Addr(), 8, uintptr(handle), uintptr(ioControlCode), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferSize), uintptr(unsafe.Pointer(outBuffer)), uintptr(outBufferSize), uintptr(unsafe.Pointer(bytesReturned)), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procDeviceIoControl.Addr(), uintptr(handle), uintptr(ioControlCode), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferSize), uintptr(unsafe.Pointer(outBuffer)), uintptr(outBufferSize), uintptr(unsafe.Pointer(bytesReturned)), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -1948,7 +1987,7 @@ func DeviceIoControl(handle Handle, ioControlCode uint32, inBuffer *byte, inBuff } func DisconnectNamedPipe(pipe Handle) (err error) { - r1, _, e1 := syscall.Syscall(procDisconnectNamedPipe.Addr(), 1, uintptr(pipe), 0, 0) + r1, _, e1 := syscall.SyscallN(procDisconnectNamedPipe.Addr(), uintptr(pipe)) if r1 == 0 { err = errnoErr(e1) } @@ -1960,7 +1999,7 @@ func DuplicateHandle(hSourceProcessHandle Handle, hSourceHandle Handle, hTargetP if bInheritHandle { _p0 = 1 } - r1, _, e1 := syscall.Syscall9(procDuplicateHandle.Addr(), 7, uintptr(hSourceProcessHandle), uintptr(hSourceHandle), uintptr(hTargetProcessHandle), uintptr(unsafe.Pointer(lpTargetHandle)), uintptr(dwDesiredAccess), uintptr(_p0), uintptr(dwOptions), 0, 0) + r1, _, e1 := syscall.SyscallN(procDuplicateHandle.Addr(), uintptr(hSourceProcessHandle), uintptr(hSourceHandle), uintptr(hTargetProcessHandle), uintptr(unsafe.Pointer(lpTargetHandle)), uintptr(dwDesiredAccess), uintptr(_p0), uintptr(dwOptions)) if r1 == 0 { err = errnoErr(e1) } @@ -1968,7 +2007,7 @@ func DuplicateHandle(hSourceProcessHandle Handle, hSourceHandle Handle, hTargetP } func EscapeCommFunction(handle Handle, dwFunc uint32) (err error) { - r1, _, e1 := syscall.Syscall(procEscapeCommFunction.Addr(), 2, uintptr(handle), uintptr(dwFunc), 0) + r1, _, e1 := syscall.SyscallN(procEscapeCommFunction.Addr(), uintptr(handle), uintptr(dwFunc)) if r1 == 0 { err = errnoErr(e1) } @@ -1976,12 +2015,12 @@ func EscapeCommFunction(handle Handle, dwFunc uint32) (err error) { } func ExitProcess(exitcode uint32) { - syscall.Syscall(procExitProcess.Addr(), 1, uintptr(exitcode), 0, 0) + syscall.SyscallN(procExitProcess.Addr(), uintptr(exitcode)) return } func ExpandEnvironmentStrings(src *uint16, dst *uint16, size uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procExpandEnvironmentStringsW.Addr(), 3, uintptr(unsafe.Pointer(src)), uintptr(unsafe.Pointer(dst)), uintptr(size)) + r0, _, e1 := syscall.SyscallN(procExpandEnvironmentStringsW.Addr(), uintptr(unsafe.Pointer(src)), uintptr(unsafe.Pointer(dst)), uintptr(size)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -1990,7 +2029,7 @@ func ExpandEnvironmentStrings(src *uint16, dst *uint16, size uint32) (n uint32, } func FindClose(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFindClose.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procFindClose.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -1998,7 +2037,7 @@ func FindClose(handle Handle) (err error) { } func FindCloseChangeNotification(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFindCloseChangeNotification.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procFindCloseChangeNotification.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -2019,7 +2058,7 @@ func _FindFirstChangeNotification(path *uint16, watchSubtree bool, notifyFilter if watchSubtree { _p1 = 1 } - r0, _, e1 := syscall.Syscall(procFindFirstChangeNotificationW.Addr(), 3, uintptr(unsafe.Pointer(path)), uintptr(_p1), uintptr(notifyFilter)) + r0, _, e1 := syscall.SyscallN(procFindFirstChangeNotificationW.Addr(), uintptr(unsafe.Pointer(path)), uintptr(_p1), uintptr(notifyFilter)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -2028,7 +2067,7 @@ func _FindFirstChangeNotification(path *uint16, watchSubtree bool, notifyFilter } func findFirstFile1(name *uint16, data *win32finddata1) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procFindFirstFileW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(data)), 0) + r0, _, e1 := syscall.SyscallN(procFindFirstFileW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(data))) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -2037,7 +2076,7 @@ func findFirstFile1(name *uint16, data *win32finddata1) (handle Handle, err erro } func FindFirstVolumeMountPoint(rootPathName *uint16, volumeMountPoint *uint16, bufferLength uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procFindFirstVolumeMountPointW.Addr(), 3, uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(bufferLength)) + r0, _, e1 := syscall.SyscallN(procFindFirstVolumeMountPointW.Addr(), uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(bufferLength)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -2046,7 +2085,7 @@ func FindFirstVolumeMountPoint(rootPathName *uint16, volumeMountPoint *uint16, b } func FindFirstVolume(volumeName *uint16, bufferLength uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procFindFirstVolumeW.Addr(), 2, uintptr(unsafe.Pointer(volumeName)), uintptr(bufferLength), 0) + r0, _, e1 := syscall.SyscallN(procFindFirstVolumeW.Addr(), uintptr(unsafe.Pointer(volumeName)), uintptr(bufferLength)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -2055,7 +2094,7 @@ func FindFirstVolume(volumeName *uint16, bufferLength uint32) (handle Handle, er } func FindNextChangeNotification(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFindNextChangeNotification.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procFindNextChangeNotification.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -2063,7 +2102,7 @@ func FindNextChangeNotification(handle Handle) (err error) { } func findNextFile1(handle Handle, data *win32finddata1) (err error) { - r1, _, e1 := syscall.Syscall(procFindNextFileW.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(data)), 0) + r1, _, e1 := syscall.SyscallN(procFindNextFileW.Addr(), uintptr(handle), uintptr(unsafe.Pointer(data))) if r1 == 0 { err = errnoErr(e1) } @@ -2071,7 +2110,7 @@ func findNextFile1(handle Handle, data *win32finddata1) (err error) { } func FindNextVolumeMountPoint(findVolumeMountPoint Handle, volumeMountPoint *uint16, bufferLength uint32) (err error) { - r1, _, e1 := syscall.Syscall(procFindNextVolumeMountPointW.Addr(), 3, uintptr(findVolumeMountPoint), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(bufferLength)) + r1, _, e1 := syscall.SyscallN(procFindNextVolumeMountPointW.Addr(), uintptr(findVolumeMountPoint), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(bufferLength)) if r1 == 0 { err = errnoErr(e1) } @@ -2079,7 +2118,7 @@ func FindNextVolumeMountPoint(findVolumeMountPoint Handle, volumeMountPoint *uin } func FindNextVolume(findVolume Handle, volumeName *uint16, bufferLength uint32) (err error) { - r1, _, e1 := syscall.Syscall(procFindNextVolumeW.Addr(), 3, uintptr(findVolume), uintptr(unsafe.Pointer(volumeName)), uintptr(bufferLength)) + r1, _, e1 := syscall.SyscallN(procFindNextVolumeW.Addr(), uintptr(findVolume), uintptr(unsafe.Pointer(volumeName)), uintptr(bufferLength)) if r1 == 0 { err = errnoErr(e1) } @@ -2087,7 +2126,7 @@ func FindNextVolume(findVolume Handle, volumeName *uint16, bufferLength uint32) } func findResource(module Handle, name uintptr, resType uintptr) (resInfo Handle, err error) { - r0, _, e1 := syscall.Syscall(procFindResourceW.Addr(), 3, uintptr(module), uintptr(name), uintptr(resType)) + r0, _, e1 := syscall.SyscallN(procFindResourceW.Addr(), uintptr(module), uintptr(name), uintptr(resType)) resInfo = Handle(r0) if resInfo == 0 { err = errnoErr(e1) @@ -2096,7 +2135,7 @@ func findResource(module Handle, name uintptr, resType uintptr) (resInfo Handle, } func FindVolumeClose(findVolume Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFindVolumeClose.Addr(), 1, uintptr(findVolume), 0, 0) + r1, _, e1 := syscall.SyscallN(procFindVolumeClose.Addr(), uintptr(findVolume)) if r1 == 0 { err = errnoErr(e1) } @@ -2104,7 +2143,15 @@ func FindVolumeClose(findVolume Handle) (err error) { } func FindVolumeMountPointClose(findVolumeMountPoint Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFindVolumeMountPointClose.Addr(), 1, uintptr(findVolumeMountPoint), 0, 0) + r1, _, e1 := syscall.SyscallN(procFindVolumeMountPointClose.Addr(), uintptr(findVolumeMountPoint)) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + +func FlushConsoleInputBuffer(console Handle) (err error) { + r1, _, e1 := syscall.SyscallN(procFlushConsoleInputBuffer.Addr(), uintptr(console)) if r1 == 0 { err = errnoErr(e1) } @@ -2112,7 +2159,7 @@ func FindVolumeMountPointClose(findVolumeMountPoint Handle) (err error) { } func FlushFileBuffers(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFlushFileBuffers.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procFlushFileBuffers.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -2120,7 +2167,7 @@ func FlushFileBuffers(handle Handle) (err error) { } func FlushViewOfFile(addr uintptr, length uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procFlushViewOfFile.Addr(), 2, uintptr(addr), uintptr(length), 0) + r1, _, e1 := syscall.SyscallN(procFlushViewOfFile.Addr(), uintptr(addr), uintptr(length)) if r1 == 0 { err = errnoErr(e1) } @@ -2132,7 +2179,7 @@ func FormatMessage(flags uint32, msgsrc uintptr, msgid uint32, langid uint32, bu if len(buf) > 0 { _p0 = &buf[0] } - r0, _, e1 := syscall.Syscall9(procFormatMessageW.Addr(), 7, uintptr(flags), uintptr(msgsrc), uintptr(msgid), uintptr(langid), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(args)), 0, 0) + r0, _, e1 := syscall.SyscallN(procFormatMessageW.Addr(), uintptr(flags), uintptr(msgsrc), uintptr(msgid), uintptr(langid), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(args))) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2141,7 +2188,7 @@ func FormatMessage(flags uint32, msgsrc uintptr, msgid uint32, langid uint32, bu } func FreeEnvironmentStrings(envs *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procFreeEnvironmentStringsW.Addr(), 1, uintptr(unsafe.Pointer(envs)), 0, 0) + r1, _, e1 := syscall.SyscallN(procFreeEnvironmentStringsW.Addr(), uintptr(unsafe.Pointer(envs))) if r1 == 0 { err = errnoErr(e1) } @@ -2149,7 +2196,7 @@ func FreeEnvironmentStrings(envs *uint16) (err error) { } func FreeLibrary(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procFreeLibrary.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procFreeLibrary.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -2157,7 +2204,7 @@ func FreeLibrary(handle Handle) (err error) { } func GenerateConsoleCtrlEvent(ctrlEvent uint32, processGroupID uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGenerateConsoleCtrlEvent.Addr(), 2, uintptr(ctrlEvent), uintptr(processGroupID), 0) + r1, _, e1 := syscall.SyscallN(procGenerateConsoleCtrlEvent.Addr(), uintptr(ctrlEvent), uintptr(processGroupID)) if r1 == 0 { err = errnoErr(e1) } @@ -2165,19 +2212,19 @@ func GenerateConsoleCtrlEvent(ctrlEvent uint32, processGroupID uint32) (err erro } func GetACP() (acp uint32) { - r0, _, _ := syscall.Syscall(procGetACP.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetACP.Addr()) acp = uint32(r0) return } func GetActiveProcessorCount(groupNumber uint16) (ret uint32) { - r0, _, _ := syscall.Syscall(procGetActiveProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) + r0, _, _ := syscall.SyscallN(procGetActiveProcessorCount.Addr(), uintptr(groupNumber)) ret = uint32(r0) return } func GetCommModemStatus(handle Handle, lpModemStat *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetCommModemStatus.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(lpModemStat)), 0) + r1, _, e1 := syscall.SyscallN(procGetCommModemStatus.Addr(), uintptr(handle), uintptr(unsafe.Pointer(lpModemStat))) if r1 == 0 { err = errnoErr(e1) } @@ -2185,7 +2232,7 @@ func GetCommModemStatus(handle Handle, lpModemStat *uint32) (err error) { } func GetCommState(handle Handle, lpDCB *DCB) (err error) { - r1, _, e1 := syscall.Syscall(procGetCommState.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(lpDCB)), 0) + r1, _, e1 := syscall.SyscallN(procGetCommState.Addr(), uintptr(handle), uintptr(unsafe.Pointer(lpDCB))) if r1 == 0 { err = errnoErr(e1) } @@ -2193,7 +2240,7 @@ func GetCommState(handle Handle, lpDCB *DCB) (err error) { } func GetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { - r1, _, e1 := syscall.Syscall(procGetCommTimeouts.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(timeouts)), 0) + r1, _, e1 := syscall.SyscallN(procGetCommTimeouts.Addr(), uintptr(handle), uintptr(unsafe.Pointer(timeouts))) if r1 == 0 { err = errnoErr(e1) } @@ -2201,13 +2248,13 @@ func GetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { } func GetCommandLine() (cmd *uint16) { - r0, _, _ := syscall.Syscall(procGetCommandLineW.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetCommandLineW.Addr()) cmd = (*uint16)(unsafe.Pointer(r0)) return } func GetComputerNameEx(nametype uint32, buf *uint16, n *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetComputerNameExW.Addr(), 3, uintptr(nametype), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(n))) + r1, _, e1 := syscall.SyscallN(procGetComputerNameExW.Addr(), uintptr(nametype), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(n))) if r1 == 0 { err = errnoErr(e1) } @@ -2215,7 +2262,7 @@ func GetComputerNameEx(nametype uint32, buf *uint16, n *uint32) (err error) { } func GetComputerName(buf *uint16, n *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetComputerNameW.Addr(), 2, uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(n)), 0) + r1, _, e1 := syscall.SyscallN(procGetComputerNameW.Addr(), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(n))) if r1 == 0 { err = errnoErr(e1) } @@ -2223,7 +2270,7 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { } func GetConsoleCP() (cp uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + r0, _, e1 := syscall.SyscallN(procGetConsoleCP.Addr()) cp = uint32(r0) if cp == 0 { err = errnoErr(e1) @@ -2232,7 +2279,7 @@ func GetConsoleCP() (cp uint32, err error) { } func GetConsoleMode(console Handle, mode *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) + r1, _, e1 := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(console), uintptr(unsafe.Pointer(mode))) if r1 == 0 { err = errnoErr(e1) } @@ -2240,7 +2287,7 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { } func GetConsoleOutputCP() (cp uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + r0, _, e1 := syscall.SyscallN(procGetConsoleOutputCP.Addr()) cp = uint32(r0) if cp == 0 { err = errnoErr(e1) @@ -2249,7 +2296,7 @@ func GetConsoleOutputCP() (cp uint32, err error) { } func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { - r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) + r1, _, e1 := syscall.SyscallN(procGetConsoleScreenBufferInfo.Addr(), uintptr(console), uintptr(unsafe.Pointer(info))) if r1 == 0 { err = errnoErr(e1) } @@ -2257,7 +2304,7 @@ func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) ( } func GetCurrentDirectory(buflen uint32, buf *uint16) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetCurrentDirectoryW.Addr(), 2, uintptr(buflen), uintptr(unsafe.Pointer(buf)), 0) + r0, _, e1 := syscall.SyscallN(procGetCurrentDirectoryW.Addr(), uintptr(buflen), uintptr(unsafe.Pointer(buf))) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2266,19 +2313,19 @@ func GetCurrentDirectory(buflen uint32, buf *uint16) (n uint32, err error) { } func GetCurrentProcessId() (pid uint32) { - r0, _, _ := syscall.Syscall(procGetCurrentProcessId.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetCurrentProcessId.Addr()) pid = uint32(r0) return } func GetCurrentThreadId() (id uint32) { - r0, _, _ := syscall.Syscall(procGetCurrentThreadId.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetCurrentThreadId.Addr()) id = uint32(r0) return } func GetDiskFreeSpaceEx(directoryName *uint16, freeBytesAvailableToCaller *uint64, totalNumberOfBytes *uint64, totalNumberOfFreeBytes *uint64) (err error) { - r1, _, e1 := syscall.Syscall6(procGetDiskFreeSpaceExW.Addr(), 4, uintptr(unsafe.Pointer(directoryName)), uintptr(unsafe.Pointer(freeBytesAvailableToCaller)), uintptr(unsafe.Pointer(totalNumberOfBytes)), uintptr(unsafe.Pointer(totalNumberOfFreeBytes)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetDiskFreeSpaceExW.Addr(), uintptr(unsafe.Pointer(directoryName)), uintptr(unsafe.Pointer(freeBytesAvailableToCaller)), uintptr(unsafe.Pointer(totalNumberOfBytes)), uintptr(unsafe.Pointer(totalNumberOfFreeBytes))) if r1 == 0 { err = errnoErr(e1) } @@ -2286,13 +2333,13 @@ func GetDiskFreeSpaceEx(directoryName *uint16, freeBytesAvailableToCaller *uint6 } func GetDriveType(rootPathName *uint16) (driveType uint32) { - r0, _, _ := syscall.Syscall(procGetDriveTypeW.Addr(), 1, uintptr(unsafe.Pointer(rootPathName)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetDriveTypeW.Addr(), uintptr(unsafe.Pointer(rootPathName))) driveType = uint32(r0) return } func GetEnvironmentStrings() (envs *uint16, err error) { - r0, _, e1 := syscall.Syscall(procGetEnvironmentStringsW.Addr(), 0, 0, 0, 0) + r0, _, e1 := syscall.SyscallN(procGetEnvironmentStringsW.Addr()) envs = (*uint16)(unsafe.Pointer(r0)) if envs == nil { err = errnoErr(e1) @@ -2301,7 +2348,7 @@ func GetEnvironmentStrings() (envs *uint16, err error) { } func GetEnvironmentVariable(name *uint16, buffer *uint16, size uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetEnvironmentVariableW.Addr(), 3, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(size)) + r0, _, e1 := syscall.SyscallN(procGetEnvironmentVariableW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(size)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2310,7 +2357,7 @@ func GetEnvironmentVariable(name *uint16, buffer *uint16, size uint32) (n uint32 } func GetExitCodeProcess(handle Handle, exitcode *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetExitCodeProcess.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(exitcode)), 0) + r1, _, e1 := syscall.SyscallN(procGetExitCodeProcess.Addr(), uintptr(handle), uintptr(unsafe.Pointer(exitcode))) if r1 == 0 { err = errnoErr(e1) } @@ -2318,7 +2365,7 @@ func GetExitCodeProcess(handle Handle, exitcode *uint32) (err error) { } func GetFileAttributesEx(name *uint16, level uint32, info *byte) (err error) { - r1, _, e1 := syscall.Syscall(procGetFileAttributesExW.Addr(), 3, uintptr(unsafe.Pointer(name)), uintptr(level), uintptr(unsafe.Pointer(info))) + r1, _, e1 := syscall.SyscallN(procGetFileAttributesExW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(level), uintptr(unsafe.Pointer(info))) if r1 == 0 { err = errnoErr(e1) } @@ -2326,7 +2373,7 @@ func GetFileAttributesEx(name *uint16, level uint32, info *byte) (err error) { } func GetFileAttributes(name *uint16) (attrs uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetFileAttributesW.Addr(), 1, uintptr(unsafe.Pointer(name)), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetFileAttributesW.Addr(), uintptr(unsafe.Pointer(name))) attrs = uint32(r0) if attrs == INVALID_FILE_ATTRIBUTES { err = errnoErr(e1) @@ -2335,7 +2382,7 @@ func GetFileAttributes(name *uint16) (attrs uint32, err error) { } func GetFileInformationByHandle(handle Handle, data *ByHandleFileInformation) (err error) { - r1, _, e1 := syscall.Syscall(procGetFileInformationByHandle.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(data)), 0) + r1, _, e1 := syscall.SyscallN(procGetFileInformationByHandle.Addr(), uintptr(handle), uintptr(unsafe.Pointer(data))) if r1 == 0 { err = errnoErr(e1) } @@ -2343,7 +2390,7 @@ func GetFileInformationByHandle(handle Handle, data *ByHandleFileInformation) (e } func GetFileInformationByHandleEx(handle Handle, class uint32, outBuffer *byte, outBufferLen uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetFileInformationByHandleEx.Addr(), 4, uintptr(handle), uintptr(class), uintptr(unsafe.Pointer(outBuffer)), uintptr(outBufferLen), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetFileInformationByHandleEx.Addr(), uintptr(handle), uintptr(class), uintptr(unsafe.Pointer(outBuffer)), uintptr(outBufferLen)) if r1 == 0 { err = errnoErr(e1) } @@ -2351,7 +2398,7 @@ func GetFileInformationByHandleEx(handle Handle, class uint32, outBuffer *byte, } func GetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetime) (err error) { - r1, _, e1 := syscall.Syscall6(procGetFileTime.Addr(), 4, uintptr(handle), uintptr(unsafe.Pointer(ctime)), uintptr(unsafe.Pointer(atime)), uintptr(unsafe.Pointer(wtime)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetFileTime.Addr(), uintptr(handle), uintptr(unsafe.Pointer(ctime)), uintptr(unsafe.Pointer(atime)), uintptr(unsafe.Pointer(wtime))) if r1 == 0 { err = errnoErr(e1) } @@ -2359,7 +2406,7 @@ func GetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetim } func GetFileType(filehandle Handle) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetFileType.Addr(), 1, uintptr(filehandle), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetFileType.Addr(), uintptr(filehandle)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2368,7 +2415,7 @@ func GetFileType(filehandle Handle) (n uint32, err error) { } func GetFinalPathNameByHandle(file Handle, filePath *uint16, filePathSize uint32, flags uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall6(procGetFinalPathNameByHandleW.Addr(), 4, uintptr(file), uintptr(unsafe.Pointer(filePath)), uintptr(filePathSize), uintptr(flags), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetFinalPathNameByHandleW.Addr(), uintptr(file), uintptr(unsafe.Pointer(filePath)), uintptr(filePathSize), uintptr(flags)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2377,7 +2424,7 @@ func GetFinalPathNameByHandle(file Handle, filePath *uint16, filePathSize uint32 } func GetFullPathName(path *uint16, buflen uint32, buf *uint16, fname **uint16) (n uint32, err error) { - r0, _, e1 := syscall.Syscall6(procGetFullPathNameW.Addr(), 4, uintptr(unsafe.Pointer(path)), uintptr(buflen), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(fname)), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetFullPathNameW.Addr(), uintptr(unsafe.Pointer(path)), uintptr(buflen), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(fname))) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2386,13 +2433,13 @@ func GetFullPathName(path *uint16, buflen uint32, buf *uint16, fname **uint16) ( } func GetLargePageMinimum() (size uintptr) { - r0, _, _ := syscall.Syscall(procGetLargePageMinimum.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetLargePageMinimum.Addr()) size = uintptr(r0) return } func GetLastError() (lasterr error) { - r0, _, _ := syscall.Syscall(procGetLastError.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetLastError.Addr()) if r0 != 0 { lasterr = syscall.Errno(r0) } @@ -2400,7 +2447,7 @@ func GetLastError() (lasterr error) { } func GetLogicalDriveStrings(bufferLength uint32, buffer *uint16) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetLogicalDriveStringsW.Addr(), 2, uintptr(bufferLength), uintptr(unsafe.Pointer(buffer)), 0) + r0, _, e1 := syscall.SyscallN(procGetLogicalDriveStringsW.Addr(), uintptr(bufferLength), uintptr(unsafe.Pointer(buffer))) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2409,7 +2456,7 @@ func GetLogicalDriveStrings(bufferLength uint32, buffer *uint16) (n uint32, err } func GetLogicalDrives() (drivesBitMask uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetLogicalDrives.Addr(), 0, 0, 0, 0) + r0, _, e1 := syscall.SyscallN(procGetLogicalDrives.Addr()) drivesBitMask = uint32(r0) if drivesBitMask == 0 { err = errnoErr(e1) @@ -2418,7 +2465,7 @@ func GetLogicalDrives() (drivesBitMask uint32, err error) { } func GetLongPathName(path *uint16, buf *uint16, buflen uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetLongPathNameW.Addr(), 3, uintptr(unsafe.Pointer(path)), uintptr(unsafe.Pointer(buf)), uintptr(buflen)) + r0, _, e1 := syscall.SyscallN(procGetLongPathNameW.Addr(), uintptr(unsafe.Pointer(path)), uintptr(unsafe.Pointer(buf)), uintptr(buflen)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2427,13 +2474,13 @@ func GetLongPathName(path *uint16, buf *uint16, buflen uint32) (n uint32, err er } func GetMaximumProcessorCount(groupNumber uint16) (ret uint32) { - r0, _, _ := syscall.Syscall(procGetMaximumProcessorCount.Addr(), 1, uintptr(groupNumber), 0, 0) + r0, _, _ := syscall.SyscallN(procGetMaximumProcessorCount.Addr(), uintptr(groupNumber)) ret = uint32(r0) return } func GetModuleFileName(module Handle, filename *uint16, size uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetModuleFileNameW.Addr(), 3, uintptr(module), uintptr(unsafe.Pointer(filename)), uintptr(size)) + r0, _, e1 := syscall.SyscallN(procGetModuleFileNameW.Addr(), uintptr(module), uintptr(unsafe.Pointer(filename)), uintptr(size)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2442,7 +2489,7 @@ func GetModuleFileName(module Handle, filename *uint16, size uint32) (n uint32, } func GetModuleHandleEx(flags uint32, moduleName *uint16, module *Handle) (err error) { - r1, _, e1 := syscall.Syscall(procGetModuleHandleExW.Addr(), 3, uintptr(flags), uintptr(unsafe.Pointer(moduleName)), uintptr(unsafe.Pointer(module))) + r1, _, e1 := syscall.SyscallN(procGetModuleHandleExW.Addr(), uintptr(flags), uintptr(unsafe.Pointer(moduleName)), uintptr(unsafe.Pointer(module))) if r1 == 0 { err = errnoErr(e1) } @@ -2450,7 +2497,7 @@ func GetModuleHandleEx(flags uint32, moduleName *uint16, module *Handle) (err er } func GetNamedPipeClientProcessId(pipe Handle, clientProcessID *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetNamedPipeClientProcessId.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(clientProcessID)), 0) + r1, _, e1 := syscall.SyscallN(procGetNamedPipeClientProcessId.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(clientProcessID))) if r1 == 0 { err = errnoErr(e1) } @@ -2458,7 +2505,7 @@ func GetNamedPipeClientProcessId(pipe Handle, clientProcessID *uint32) (err erro } func GetNamedPipeHandleState(pipe Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procGetNamedPipeHandleStateW.Addr(), 7, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetNamedPipeHandleStateW.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize)) if r1 == 0 { err = errnoErr(e1) } @@ -2466,7 +2513,7 @@ func GetNamedPipeHandleState(pipe Handle, state *uint32, curInstances *uint32, m } func GetNamedPipeInfo(pipe Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetNamedPipeInfo.Addr(), 5, uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances)), 0) + r1, _, e1 := syscall.SyscallN(procGetNamedPipeInfo.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances))) if r1 == 0 { err = errnoErr(e1) } @@ -2474,7 +2521,15 @@ func GetNamedPipeInfo(pipe Handle, flags *uint32, outSize *uint32, inSize *uint3 } func GetNamedPipeServerProcessId(pipe Handle, serverProcessID *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetNamedPipeServerProcessId.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(serverProcessID)), 0) + r1, _, e1 := syscall.SyscallN(procGetNamedPipeServerProcessId.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(serverProcessID))) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + +func GetNumberOfConsoleInputEvents(console Handle, numevents *uint32) (err error) { + r1, _, e1 := syscall.SyscallN(procGetNumberOfConsoleInputEvents.Addr(), uintptr(console), uintptr(unsafe.Pointer(numevents))) if r1 == 0 { err = errnoErr(e1) } @@ -2486,7 +2541,7 @@ func GetOverlappedResult(handle Handle, overlapped *Overlapped, done *uint32, wa if wait { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procGetOverlappedResult.Addr(), 4, uintptr(handle), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(done)), uintptr(_p0), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetOverlappedResult.Addr(), uintptr(handle), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(done)), uintptr(_p0)) if r1 == 0 { err = errnoErr(e1) } @@ -2494,7 +2549,7 @@ func GetOverlappedResult(handle Handle, overlapped *Overlapped, done *uint32, wa } func GetPriorityClass(process Handle) (ret uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetPriorityClass.Addr(), 1, uintptr(process), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetPriorityClass.Addr(), uintptr(process)) ret = uint32(r0) if ret == 0 { err = errnoErr(e1) @@ -2512,7 +2567,7 @@ func GetProcAddress(module Handle, procname string) (proc uintptr, err error) { } func _GetProcAddress(module Handle, procname *byte) (proc uintptr, err error) { - r0, _, e1 := syscall.Syscall(procGetProcAddress.Addr(), 2, uintptr(module), uintptr(unsafe.Pointer(procname)), 0) + r0, _, e1 := syscall.SyscallN(procGetProcAddress.Addr(), uintptr(module), uintptr(unsafe.Pointer(procname))) proc = uintptr(r0) if proc == 0 { err = errnoErr(e1) @@ -2521,7 +2576,7 @@ func _GetProcAddress(module Handle, procname *byte) (proc uintptr, err error) { } func GetProcessId(process Handle) (id uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetProcessId.Addr(), 1, uintptr(process), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetProcessId.Addr(), uintptr(process)) id = uint32(r0) if id == 0 { err = errnoErr(e1) @@ -2530,7 +2585,7 @@ func GetProcessId(process Handle) (id uint32, err error) { } func getProcessPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint16, bufSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetProcessPreferredUILanguages.Addr(), 4, uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetProcessPreferredUILanguages.Addr(), uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize))) if r1 == 0 { err = errnoErr(e1) } @@ -2538,7 +2593,7 @@ func getProcessPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uin } func GetProcessShutdownParameters(level *uint32, flags *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetProcessShutdownParameters.Addr(), 2, uintptr(unsafe.Pointer(level)), uintptr(unsafe.Pointer(flags)), 0) + r1, _, e1 := syscall.SyscallN(procGetProcessShutdownParameters.Addr(), uintptr(unsafe.Pointer(level)), uintptr(unsafe.Pointer(flags))) if r1 == 0 { err = errnoErr(e1) } @@ -2546,7 +2601,7 @@ func GetProcessShutdownParameters(level *uint32, flags *uint32) (err error) { } func GetProcessTimes(handle Handle, creationTime *Filetime, exitTime *Filetime, kernelTime *Filetime, userTime *Filetime) (err error) { - r1, _, e1 := syscall.Syscall6(procGetProcessTimes.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(creationTime)), uintptr(unsafe.Pointer(exitTime)), uintptr(unsafe.Pointer(kernelTime)), uintptr(unsafe.Pointer(userTime)), 0) + r1, _, e1 := syscall.SyscallN(procGetProcessTimes.Addr(), uintptr(handle), uintptr(unsafe.Pointer(creationTime)), uintptr(unsafe.Pointer(exitTime)), uintptr(unsafe.Pointer(kernelTime)), uintptr(unsafe.Pointer(userTime))) if r1 == 0 { err = errnoErr(e1) } @@ -2554,12 +2609,12 @@ func GetProcessTimes(handle Handle, creationTime *Filetime, exitTime *Filetime, } func GetProcessWorkingSetSizeEx(hProcess Handle, lpMinimumWorkingSetSize *uintptr, lpMaximumWorkingSetSize *uintptr, flags *uint32) { - syscall.Syscall6(procGetProcessWorkingSetSizeEx.Addr(), 4, uintptr(hProcess), uintptr(unsafe.Pointer(lpMinimumWorkingSetSize)), uintptr(unsafe.Pointer(lpMaximumWorkingSetSize)), uintptr(unsafe.Pointer(flags)), 0, 0) + syscall.SyscallN(procGetProcessWorkingSetSizeEx.Addr(), uintptr(hProcess), uintptr(unsafe.Pointer(lpMinimumWorkingSetSize)), uintptr(unsafe.Pointer(lpMaximumWorkingSetSize)), uintptr(unsafe.Pointer(flags))) return } func GetQueuedCompletionStatus(cphandle Handle, qty *uint32, key *uintptr, overlapped **Overlapped, timeout uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(cphandle), uintptr(unsafe.Pointer(qty)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(overlapped)), uintptr(timeout), 0) + r1, _, e1 := syscall.SyscallN(procGetQueuedCompletionStatus.Addr(), uintptr(cphandle), uintptr(unsafe.Pointer(qty)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(overlapped)), uintptr(timeout)) if r1 == 0 { err = errnoErr(e1) } @@ -2567,7 +2622,7 @@ func GetQueuedCompletionStatus(cphandle Handle, qty *uint32, key *uintptr, overl } func GetShortPathName(longpath *uint16, shortpath *uint16, buflen uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetShortPathNameW.Addr(), 3, uintptr(unsafe.Pointer(longpath)), uintptr(unsafe.Pointer(shortpath)), uintptr(buflen)) + r0, _, e1 := syscall.SyscallN(procGetShortPathNameW.Addr(), uintptr(unsafe.Pointer(longpath)), uintptr(unsafe.Pointer(shortpath)), uintptr(buflen)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2576,12 +2631,12 @@ func GetShortPathName(longpath *uint16, shortpath *uint16, buflen uint32) (n uin } func getStartupInfo(startupInfo *StartupInfo) { - syscall.Syscall(procGetStartupInfoW.Addr(), 1, uintptr(unsafe.Pointer(startupInfo)), 0, 0) + syscall.SyscallN(procGetStartupInfoW.Addr(), uintptr(unsafe.Pointer(startupInfo))) return } func GetStdHandle(stdhandle uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procGetStdHandle.Addr(), 1, uintptr(stdhandle), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetStdHandle.Addr(), uintptr(stdhandle)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -2590,7 +2645,7 @@ func GetStdHandle(stdhandle uint32) (handle Handle, err error) { } func getSystemDirectory(dir *uint16, dirLen uint32) (len uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetSystemDirectoryW.Addr(), 2, uintptr(unsafe.Pointer(dir)), uintptr(dirLen), 0) + r0, _, e1 := syscall.SyscallN(procGetSystemDirectoryW.Addr(), uintptr(unsafe.Pointer(dir)), uintptr(dirLen)) len = uint32(r0) if len == 0 { err = errnoErr(e1) @@ -2599,7 +2654,7 @@ func getSystemDirectory(dir *uint16, dirLen uint32) (len uint32, err error) { } func getSystemPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint16, bufSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetSystemPreferredUILanguages.Addr(), 4, uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetSystemPreferredUILanguages.Addr(), uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize))) if r1 == 0 { err = errnoErr(e1) } @@ -2607,17 +2662,17 @@ func getSystemPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint } func GetSystemTimeAsFileTime(time *Filetime) { - syscall.Syscall(procGetSystemTimeAsFileTime.Addr(), 1, uintptr(unsafe.Pointer(time)), 0, 0) + syscall.SyscallN(procGetSystemTimeAsFileTime.Addr(), uintptr(unsafe.Pointer(time))) return } func GetSystemTimePreciseAsFileTime(time *Filetime) { - syscall.Syscall(procGetSystemTimePreciseAsFileTime.Addr(), 1, uintptr(unsafe.Pointer(time)), 0, 0) + syscall.SyscallN(procGetSystemTimePreciseAsFileTime.Addr(), uintptr(unsafe.Pointer(time))) return } func getSystemWindowsDirectory(dir *uint16, dirLen uint32) (len uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetSystemWindowsDirectoryW.Addr(), 2, uintptr(unsafe.Pointer(dir)), uintptr(dirLen), 0) + r0, _, e1 := syscall.SyscallN(procGetSystemWindowsDirectoryW.Addr(), uintptr(unsafe.Pointer(dir)), uintptr(dirLen)) len = uint32(r0) if len == 0 { err = errnoErr(e1) @@ -2626,7 +2681,7 @@ func getSystemWindowsDirectory(dir *uint16, dirLen uint32) (len uint32, err erro } func GetTempPath(buflen uint32, buf *uint16) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetTempPathW.Addr(), 2, uintptr(buflen), uintptr(unsafe.Pointer(buf)), 0) + r0, _, e1 := syscall.SyscallN(procGetTempPathW.Addr(), uintptr(buflen), uintptr(unsafe.Pointer(buf))) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2635,7 +2690,7 @@ func GetTempPath(buflen uint32, buf *uint16) (n uint32, err error) { } func getThreadPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint16, bufSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetThreadPreferredUILanguages.Addr(), 4, uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetThreadPreferredUILanguages.Addr(), uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize))) if r1 == 0 { err = errnoErr(e1) } @@ -2643,13 +2698,13 @@ func getThreadPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint } func getTickCount64() (ms uint64) { - r0, _, _ := syscall.Syscall(procGetTickCount64.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetTickCount64.Addr()) ms = uint64(r0) return } func GetTimeZoneInformation(tzi *Timezoneinformation) (rc uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetTimeZoneInformation.Addr(), 1, uintptr(unsafe.Pointer(tzi)), 0, 0) + r0, _, e1 := syscall.SyscallN(procGetTimeZoneInformation.Addr(), uintptr(unsafe.Pointer(tzi))) rc = uint32(r0) if rc == 0xffffffff { err = errnoErr(e1) @@ -2658,7 +2713,7 @@ func GetTimeZoneInformation(tzi *Timezoneinformation) (rc uint32, err error) { } func getUserPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint16, bufSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetUserPreferredUILanguages.Addr(), 4, uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetUserPreferredUILanguages.Addr(), uintptr(flags), uintptr(unsafe.Pointer(numLanguages)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(bufSize))) if r1 == 0 { err = errnoErr(e1) } @@ -2666,7 +2721,7 @@ func getUserPreferredUILanguages(flags uint32, numLanguages *uint32, buf *uint16 } func GetVersion() (ver uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetVersion.Addr(), 0, 0, 0, 0) + r0, _, e1 := syscall.SyscallN(procGetVersion.Addr()) ver = uint32(r0) if ver == 0 { err = errnoErr(e1) @@ -2675,7 +2730,7 @@ func GetVersion() (ver uint32, err error) { } func GetVolumeInformationByHandle(file Handle, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procGetVolumeInformationByHandleW.Addr(), 8, uintptr(file), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize), 0) + r1, _, e1 := syscall.SyscallN(procGetVolumeInformationByHandleW.Addr(), uintptr(file), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize)) if r1 == 0 { err = errnoErr(e1) } @@ -2683,7 +2738,7 @@ func GetVolumeInformationByHandle(file Handle, volumeNameBuffer *uint16, volumeN } func GetVolumeInformation(rootPathName *uint16, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procGetVolumeInformationW.Addr(), 8, uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize), 0) + r1, _, e1 := syscall.SyscallN(procGetVolumeInformationW.Addr(), uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize)) if r1 == 0 { err = errnoErr(e1) } @@ -2691,7 +2746,7 @@ func GetVolumeInformation(rootPathName *uint16, volumeNameBuffer *uint16, volume } func GetVolumeNameForVolumeMountPoint(volumeMountPoint *uint16, volumeName *uint16, bufferlength uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetVolumeNameForVolumeMountPointW.Addr(), 3, uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(unsafe.Pointer(volumeName)), uintptr(bufferlength)) + r1, _, e1 := syscall.SyscallN(procGetVolumeNameForVolumeMountPointW.Addr(), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(unsafe.Pointer(volumeName)), uintptr(bufferlength)) if r1 == 0 { err = errnoErr(e1) } @@ -2699,7 +2754,7 @@ func GetVolumeNameForVolumeMountPoint(volumeMountPoint *uint16, volumeName *uint } func GetVolumePathName(fileName *uint16, volumePathName *uint16, bufferLength uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetVolumePathNameW.Addr(), 3, uintptr(unsafe.Pointer(fileName)), uintptr(unsafe.Pointer(volumePathName)), uintptr(bufferLength)) + r1, _, e1 := syscall.SyscallN(procGetVolumePathNameW.Addr(), uintptr(unsafe.Pointer(fileName)), uintptr(unsafe.Pointer(volumePathName)), uintptr(bufferLength)) if r1 == 0 { err = errnoErr(e1) } @@ -2707,7 +2762,7 @@ func GetVolumePathName(fileName *uint16, volumePathName *uint16, bufferLength ui } func GetVolumePathNamesForVolumeName(volumeName *uint16, volumePathNames *uint16, bufferLength uint32, returnLength *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetVolumePathNamesForVolumeNameW.Addr(), 4, uintptr(unsafe.Pointer(volumeName)), uintptr(unsafe.Pointer(volumePathNames)), uintptr(bufferLength), uintptr(unsafe.Pointer(returnLength)), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetVolumePathNamesForVolumeNameW.Addr(), uintptr(unsafe.Pointer(volumeName)), uintptr(unsafe.Pointer(volumePathNames)), uintptr(bufferLength), uintptr(unsafe.Pointer(returnLength))) if r1 == 0 { err = errnoErr(e1) } @@ -2715,7 +2770,7 @@ func GetVolumePathNamesForVolumeName(volumeName *uint16, volumePathNames *uint16 } func getWindowsDirectory(dir *uint16, dirLen uint32) (len uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetWindowsDirectoryW.Addr(), 2, uintptr(unsafe.Pointer(dir)), uintptr(dirLen), 0) + r0, _, e1 := syscall.SyscallN(procGetWindowsDirectoryW.Addr(), uintptr(unsafe.Pointer(dir)), uintptr(dirLen)) len = uint32(r0) if len == 0 { err = errnoErr(e1) @@ -2724,7 +2779,7 @@ func getWindowsDirectory(dir *uint16, dirLen uint32) (len uint32, err error) { } func initializeProcThreadAttributeList(attrlist *ProcThreadAttributeList, attrcount uint32, flags uint32, size *uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procInitializeProcThreadAttributeList.Addr(), 4, uintptr(unsafe.Pointer(attrlist)), uintptr(attrcount), uintptr(flags), uintptr(unsafe.Pointer(size)), 0, 0) + r1, _, e1 := syscall.SyscallN(procInitializeProcThreadAttributeList.Addr(), uintptr(unsafe.Pointer(attrlist)), uintptr(attrcount), uintptr(flags), uintptr(unsafe.Pointer(size))) if r1 == 0 { err = errnoErr(e1) } @@ -2736,7 +2791,7 @@ func IsWow64Process(handle Handle, isWow64 *bool) (err error) { if *isWow64 { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procIsWow64Process.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(&_p0)), 0) + r1, _, e1 := syscall.SyscallN(procIsWow64Process.Addr(), uintptr(handle), uintptr(unsafe.Pointer(&_p0))) *isWow64 = _p0 != 0 if r1 == 0 { err = errnoErr(e1) @@ -2749,7 +2804,7 @@ func IsWow64Process2(handle Handle, processMachine *uint16, nativeMachine *uint1 if err != nil { return } - r1, _, e1 := syscall.Syscall(procIsWow64Process2.Addr(), 3, uintptr(handle), uintptr(unsafe.Pointer(processMachine)), uintptr(unsafe.Pointer(nativeMachine))) + r1, _, e1 := syscall.SyscallN(procIsWow64Process2.Addr(), uintptr(handle), uintptr(unsafe.Pointer(processMachine)), uintptr(unsafe.Pointer(nativeMachine))) if r1 == 0 { err = errnoErr(e1) } @@ -2766,7 +2821,7 @@ func LoadLibraryEx(libname string, zero Handle, flags uintptr) (handle Handle, e } func _LoadLibraryEx(libname *uint16, zero Handle, flags uintptr) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procLoadLibraryExW.Addr(), 3, uintptr(unsafe.Pointer(libname)), uintptr(zero), uintptr(flags)) + r0, _, e1 := syscall.SyscallN(procLoadLibraryExW.Addr(), uintptr(unsafe.Pointer(libname)), uintptr(zero), uintptr(flags)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2784,7 +2839,7 @@ func LoadLibrary(libname string) (handle Handle, err error) { } func _LoadLibrary(libname *uint16) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procLoadLibraryW.Addr(), 1, uintptr(unsafe.Pointer(libname)), 0, 0) + r0, _, e1 := syscall.SyscallN(procLoadLibraryW.Addr(), uintptr(unsafe.Pointer(libname))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2793,7 +2848,7 @@ func _LoadLibrary(libname *uint16) (handle Handle, err error) { } func LoadResource(module Handle, resInfo Handle) (resData Handle, err error) { - r0, _, e1 := syscall.Syscall(procLoadResource.Addr(), 2, uintptr(module), uintptr(resInfo), 0) + r0, _, e1 := syscall.SyscallN(procLoadResource.Addr(), uintptr(module), uintptr(resInfo)) resData = Handle(r0) if resData == 0 { err = errnoErr(e1) @@ -2802,7 +2857,7 @@ func LoadResource(module Handle, resInfo Handle) (resData Handle, err error) { } func LocalAlloc(flags uint32, length uint32) (ptr uintptr, err error) { - r0, _, e1 := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(flags), uintptr(length), 0) + r0, _, e1 := syscall.SyscallN(procLocalAlloc.Addr(), uintptr(flags), uintptr(length)) ptr = uintptr(r0) if ptr == 0 { err = errnoErr(e1) @@ -2811,7 +2866,7 @@ func LocalAlloc(flags uint32, length uint32) (ptr uintptr, err error) { } func LocalFree(hmem Handle) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procLocalFree.Addr(), 1, uintptr(hmem), 0, 0) + r0, _, e1 := syscall.SyscallN(procLocalFree.Addr(), uintptr(hmem)) handle = Handle(r0) if handle != 0 { err = errnoErr(e1) @@ -2820,7 +2875,7 @@ func LocalFree(hmem Handle) (handle Handle, err error) { } func LockFileEx(file Handle, flags uint32, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(file), uintptr(flags), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped))) + r1, _, e1 := syscall.SyscallN(procLockFileEx.Addr(), uintptr(file), uintptr(flags), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -2828,7 +2883,7 @@ func LockFileEx(file Handle, flags uint32, reserved uint32, bytesLow uint32, byt } func LockResource(resData Handle) (addr uintptr, err error) { - r0, _, e1 := syscall.Syscall(procLockResource.Addr(), 1, uintptr(resData), 0, 0) + r0, _, e1 := syscall.SyscallN(procLockResource.Addr(), uintptr(resData)) addr = uintptr(r0) if addr == 0 { err = errnoErr(e1) @@ -2837,7 +2892,7 @@ func LockResource(resData Handle) (addr uintptr, err error) { } func MapViewOfFile(handle Handle, access uint32, offsetHigh uint32, offsetLow uint32, length uintptr) (addr uintptr, err error) { - r0, _, e1 := syscall.Syscall6(procMapViewOfFile.Addr(), 5, uintptr(handle), uintptr(access), uintptr(offsetHigh), uintptr(offsetLow), uintptr(length), 0) + r0, _, e1 := syscall.SyscallN(procMapViewOfFile.Addr(), uintptr(handle), uintptr(access), uintptr(offsetHigh), uintptr(offsetLow), uintptr(length)) addr = uintptr(r0) if addr == 0 { err = errnoErr(e1) @@ -2846,7 +2901,7 @@ func MapViewOfFile(handle Handle, access uint32, offsetHigh uint32, offsetLow ui } func Module32First(snapshot Handle, moduleEntry *ModuleEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procModule32FirstW.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(moduleEntry)), 0) + r1, _, e1 := syscall.SyscallN(procModule32FirstW.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(moduleEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -2854,7 +2909,7 @@ func Module32First(snapshot Handle, moduleEntry *ModuleEntry32) (err error) { } func Module32Next(snapshot Handle, moduleEntry *ModuleEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procModule32NextW.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(moduleEntry)), 0) + r1, _, e1 := syscall.SyscallN(procModule32NextW.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(moduleEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -2862,7 +2917,7 @@ func Module32Next(snapshot Handle, moduleEntry *ModuleEntry32) (err error) { } func MoveFileEx(from *uint16, to *uint16, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags)) + r1, _, e1 := syscall.SyscallN(procMoveFileExW.Addr(), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -2870,7 +2925,7 @@ func MoveFileEx(from *uint16, to *uint16, flags uint32) (err error) { } func MoveFile(from *uint16, to *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procMoveFileW.Addr(), 2, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), 0) + r1, _, e1 := syscall.SyscallN(procMoveFileW.Addr(), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to))) if r1 == 0 { err = errnoErr(e1) } @@ -2878,7 +2933,7 @@ func MoveFile(from *uint16, to *uint16) (err error) { } func MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) { - r0, _, e1 := syscall.Syscall6(procMultiByteToWideChar.Addr(), 6, uintptr(codePage), uintptr(dwFlags), uintptr(unsafe.Pointer(str)), uintptr(nstr), uintptr(unsafe.Pointer(wchar)), uintptr(nwchar)) + r0, _, e1 := syscall.SyscallN(procMultiByteToWideChar.Addr(), uintptr(codePage), uintptr(dwFlags), uintptr(unsafe.Pointer(str)), uintptr(nstr), uintptr(unsafe.Pointer(wchar)), uintptr(nwchar)) nwrite = int32(r0) if nwrite == 0 { err = errnoErr(e1) @@ -2891,7 +2946,7 @@ func OpenEvent(desiredAccess uint32, inheritHandle bool, name *uint16) (handle H if inheritHandle { _p0 = 1 } - r0, _, e1 := syscall.Syscall(procOpenEventW.Addr(), 3, uintptr(desiredAccess), uintptr(_p0), uintptr(unsafe.Pointer(name))) + r0, _, e1 := syscall.SyscallN(procOpenEventW.Addr(), uintptr(desiredAccess), uintptr(_p0), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2904,7 +2959,7 @@ func OpenMutex(desiredAccess uint32, inheritHandle bool, name *uint16) (handle H if inheritHandle { _p0 = 1 } - r0, _, e1 := syscall.Syscall(procOpenMutexW.Addr(), 3, uintptr(desiredAccess), uintptr(_p0), uintptr(unsafe.Pointer(name))) + r0, _, e1 := syscall.SyscallN(procOpenMutexW.Addr(), uintptr(desiredAccess), uintptr(_p0), uintptr(unsafe.Pointer(name))) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2917,7 +2972,7 @@ func OpenProcess(desiredAccess uint32, inheritHandle bool, processId uint32) (ha if inheritHandle { _p0 = 1 } - r0, _, e1 := syscall.Syscall(procOpenProcess.Addr(), 3, uintptr(desiredAccess), uintptr(_p0), uintptr(processId)) + r0, _, e1 := syscall.SyscallN(procOpenProcess.Addr(), uintptr(desiredAccess), uintptr(_p0), uintptr(processId)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2930,7 +2985,7 @@ func OpenThread(desiredAccess uint32, inheritHandle bool, threadId uint32) (hand if inheritHandle { _p0 = 1 } - r0, _, e1 := syscall.Syscall(procOpenThread.Addr(), 3, uintptr(desiredAccess), uintptr(_p0), uintptr(threadId)) + r0, _, e1 := syscall.SyscallN(procOpenThread.Addr(), uintptr(desiredAccess), uintptr(_p0), uintptr(threadId)) handle = Handle(r0) if handle == 0 { err = errnoErr(e1) @@ -2939,7 +2994,7 @@ func OpenThread(desiredAccess uint32, inheritHandle bool, threadId uint32) (hand } func PostQueuedCompletionStatus(cphandle Handle, qty uint32, key uintptr, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall6(procPostQueuedCompletionStatus.Addr(), 4, uintptr(cphandle), uintptr(qty), uintptr(key), uintptr(unsafe.Pointer(overlapped)), 0, 0) + r1, _, e1 := syscall.SyscallN(procPostQueuedCompletionStatus.Addr(), uintptr(cphandle), uintptr(qty), uintptr(key), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -2947,7 +3002,7 @@ func PostQueuedCompletionStatus(cphandle Handle, qty uint32, key uintptr, overla } func Process32First(snapshot Handle, procEntry *ProcessEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procProcess32FirstW.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(procEntry)), 0) + r1, _, e1 := syscall.SyscallN(procProcess32FirstW.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(procEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -2955,7 +3010,7 @@ func Process32First(snapshot Handle, procEntry *ProcessEntry32) (err error) { } func Process32Next(snapshot Handle, procEntry *ProcessEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procProcess32NextW.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(procEntry)), 0) + r1, _, e1 := syscall.SyscallN(procProcess32NextW.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(procEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -2963,7 +3018,7 @@ func Process32Next(snapshot Handle, procEntry *ProcessEntry32) (err error) { } func ProcessIdToSessionId(pid uint32, sessionid *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procProcessIdToSessionId.Addr(), 2, uintptr(pid), uintptr(unsafe.Pointer(sessionid)), 0) + r1, _, e1 := syscall.SyscallN(procProcessIdToSessionId.Addr(), uintptr(pid), uintptr(unsafe.Pointer(sessionid))) if r1 == 0 { err = errnoErr(e1) } @@ -2971,7 +3026,7 @@ func ProcessIdToSessionId(pid uint32, sessionid *uint32) (err error) { } func PulseEvent(event Handle) (err error) { - r1, _, e1 := syscall.Syscall(procPulseEvent.Addr(), 1, uintptr(event), 0, 0) + r1, _, e1 := syscall.SyscallN(procPulseEvent.Addr(), uintptr(event)) if r1 == 0 { err = errnoErr(e1) } @@ -2979,7 +3034,7 @@ func PulseEvent(event Handle) (err error) { } func PurgeComm(handle Handle, dwFlags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procPurgeComm.Addr(), 2, uintptr(handle), uintptr(dwFlags), 0) + r1, _, e1 := syscall.SyscallN(procPurgeComm.Addr(), uintptr(handle), uintptr(dwFlags)) if r1 == 0 { err = errnoErr(e1) } @@ -2987,7 +3042,7 @@ func PurgeComm(handle Handle, dwFlags uint32) (err error) { } func QueryDosDevice(deviceName *uint16, targetPath *uint16, max uint32) (n uint32, err error) { - r0, _, e1 := syscall.Syscall(procQueryDosDeviceW.Addr(), 3, uintptr(unsafe.Pointer(deviceName)), uintptr(unsafe.Pointer(targetPath)), uintptr(max)) + r0, _, e1 := syscall.SyscallN(procQueryDosDeviceW.Addr(), uintptr(unsafe.Pointer(deviceName)), uintptr(unsafe.Pointer(targetPath)), uintptr(max)) n = uint32(r0) if n == 0 { err = errnoErr(e1) @@ -2996,7 +3051,7 @@ func QueryDosDevice(deviceName *uint16, targetPath *uint16, max uint32) (n uint3 } func QueryFullProcessImageName(proc Handle, flags uint32, exeName *uint16, size *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryFullProcessImageNameW.Addr(), 4, uintptr(proc), uintptr(flags), uintptr(unsafe.Pointer(exeName)), uintptr(unsafe.Pointer(size)), 0, 0) + r1, _, e1 := syscall.SyscallN(procQueryFullProcessImageNameW.Addr(), uintptr(proc), uintptr(flags), uintptr(unsafe.Pointer(exeName)), uintptr(unsafe.Pointer(size))) if r1 == 0 { err = errnoErr(e1) } @@ -3004,7 +3059,7 @@ func QueryFullProcessImageName(proc Handle, flags uint32, exeName *uint16, size } func QueryInformationJobObject(job Handle, JobObjectInformationClass int32, JobObjectInformation uintptr, JobObjectInformationLength uint32, retlen *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procQueryInformationJobObject.Addr(), 5, uintptr(job), uintptr(JobObjectInformationClass), uintptr(JobObjectInformation), uintptr(JobObjectInformationLength), uintptr(unsafe.Pointer(retlen)), 0) + r1, _, e1 := syscall.SyscallN(procQueryInformationJobObject.Addr(), uintptr(job), uintptr(JobObjectInformationClass), uintptr(JobObjectInformation), uintptr(JobObjectInformationLength), uintptr(unsafe.Pointer(retlen))) if r1 == 0 { err = errnoErr(e1) } @@ -3012,7 +3067,7 @@ func QueryInformationJobObject(job Handle, JobObjectInformationClass int32, JobO } func ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) { - r1, _, e1 := syscall.Syscall6(procReadConsoleW.Addr(), 5, uintptr(console), uintptr(unsafe.Pointer(buf)), uintptr(toread), uintptr(unsafe.Pointer(read)), uintptr(unsafe.Pointer(inputControl)), 0) + r1, _, e1 := syscall.SyscallN(procReadConsoleW.Addr(), uintptr(console), uintptr(unsafe.Pointer(buf)), uintptr(toread), uintptr(unsafe.Pointer(read)), uintptr(unsafe.Pointer(inputControl))) if r1 == 0 { err = errnoErr(e1) } @@ -3024,7 +3079,7 @@ func ReadDirectoryChanges(handle Handle, buf *byte, buflen uint32, watchSubTree if watchSubTree { _p0 = 1 } - r1, _, e1 := syscall.Syscall9(procReadDirectoryChangesW.Addr(), 8, uintptr(handle), uintptr(unsafe.Pointer(buf)), uintptr(buflen), uintptr(_p0), uintptr(mask), uintptr(unsafe.Pointer(retlen)), uintptr(unsafe.Pointer(overlapped)), uintptr(completionRoutine), 0) + r1, _, e1 := syscall.SyscallN(procReadDirectoryChangesW.Addr(), uintptr(handle), uintptr(unsafe.Pointer(buf)), uintptr(buflen), uintptr(_p0), uintptr(mask), uintptr(unsafe.Pointer(retlen)), uintptr(unsafe.Pointer(overlapped)), uintptr(completionRoutine)) if r1 == 0 { err = errnoErr(e1) } @@ -3036,7 +3091,7 @@ func readFile(handle Handle, buf []byte, done *uint32, overlapped *Overlapped) ( if len(buf) > 0 { _p0 = &buf[0] } - r1, _, e1 := syscall.Syscall6(procReadFile.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(done)), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procReadFile.Addr(), uintptr(handle), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(done)), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -3044,7 +3099,7 @@ func readFile(handle Handle, buf []byte, done *uint32, overlapped *Overlapped) ( } func ReadProcessMemory(process Handle, baseAddress uintptr, buffer *byte, size uintptr, numberOfBytesRead *uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procReadProcessMemory.Addr(), 5, uintptr(process), uintptr(baseAddress), uintptr(unsafe.Pointer(buffer)), uintptr(size), uintptr(unsafe.Pointer(numberOfBytesRead)), 0) + r1, _, e1 := syscall.SyscallN(procReadProcessMemory.Addr(), uintptr(process), uintptr(baseAddress), uintptr(unsafe.Pointer(buffer)), uintptr(size), uintptr(unsafe.Pointer(numberOfBytesRead))) if r1 == 0 { err = errnoErr(e1) } @@ -3052,7 +3107,7 @@ func ReadProcessMemory(process Handle, baseAddress uintptr, buffer *byte, size u } func ReleaseMutex(mutex Handle) (err error) { - r1, _, e1 := syscall.Syscall(procReleaseMutex.Addr(), 1, uintptr(mutex), 0, 0) + r1, _, e1 := syscall.SyscallN(procReleaseMutex.Addr(), uintptr(mutex)) if r1 == 0 { err = errnoErr(e1) } @@ -3060,7 +3115,7 @@ func ReleaseMutex(mutex Handle) (err error) { } func RemoveDirectory(path *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procRemoveDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) + r1, _, e1 := syscall.SyscallN(procRemoveDirectoryW.Addr(), uintptr(unsafe.Pointer(path))) if r1 == 0 { err = errnoErr(e1) } @@ -3068,7 +3123,7 @@ func RemoveDirectory(path *uint16) (err error) { } func RemoveDllDirectory(cookie uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procRemoveDllDirectory.Addr(), 1, uintptr(cookie), 0, 0) + r1, _, e1 := syscall.SyscallN(procRemoveDllDirectory.Addr(), uintptr(cookie)) if r1 == 0 { err = errnoErr(e1) } @@ -3076,7 +3131,7 @@ func RemoveDllDirectory(cookie uintptr) (err error) { } func ResetEvent(event Handle) (err error) { - r1, _, e1 := syscall.Syscall(procResetEvent.Addr(), 1, uintptr(event), 0, 0) + r1, _, e1 := syscall.SyscallN(procResetEvent.Addr(), uintptr(event)) if r1 == 0 { err = errnoErr(e1) } @@ -3084,7 +3139,7 @@ func ResetEvent(event Handle) (err error) { } func resizePseudoConsole(pconsole Handle, size uint32) (hr error) { - r0, _, _ := syscall.Syscall(procResizePseudoConsole.Addr(), 2, uintptr(pconsole), uintptr(size), 0) + r0, _, _ := syscall.SyscallN(procResizePseudoConsole.Addr(), uintptr(pconsole), uintptr(size)) if r0 != 0 { hr = syscall.Errno(r0) } @@ -3092,7 +3147,7 @@ func resizePseudoConsole(pconsole Handle, size uint32) (hr error) { } func ResumeThread(thread Handle) (ret uint32, err error) { - r0, _, e1 := syscall.Syscall(procResumeThread.Addr(), 1, uintptr(thread), 0, 0) + r0, _, e1 := syscall.SyscallN(procResumeThread.Addr(), uintptr(thread)) ret = uint32(r0) if ret == 0xffffffff { err = errnoErr(e1) @@ -3101,7 +3156,7 @@ func ResumeThread(thread Handle) (ret uint32, err error) { } func SetCommBreak(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procSetCommBreak.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetCommBreak.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -3109,7 +3164,7 @@ func SetCommBreak(handle Handle) (err error) { } func SetCommMask(handle Handle, dwEvtMask uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetCommMask.Addr(), 2, uintptr(handle), uintptr(dwEvtMask), 0) + r1, _, e1 := syscall.SyscallN(procSetCommMask.Addr(), uintptr(handle), uintptr(dwEvtMask)) if r1 == 0 { err = errnoErr(e1) } @@ -3117,7 +3172,7 @@ func SetCommMask(handle Handle, dwEvtMask uint32) (err error) { } func SetCommState(handle Handle, lpDCB *DCB) (err error) { - r1, _, e1 := syscall.Syscall(procSetCommState.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(lpDCB)), 0) + r1, _, e1 := syscall.SyscallN(procSetCommState.Addr(), uintptr(handle), uintptr(unsafe.Pointer(lpDCB))) if r1 == 0 { err = errnoErr(e1) } @@ -3125,7 +3180,7 @@ func SetCommState(handle Handle, lpDCB *DCB) (err error) { } func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { - r1, _, e1 := syscall.Syscall(procSetCommTimeouts.Addr(), 2, uintptr(handle), uintptr(unsafe.Pointer(timeouts)), 0) + r1, _, e1 := syscall.SyscallN(procSetCommTimeouts.Addr(), uintptr(handle), uintptr(unsafe.Pointer(timeouts))) if r1 == 0 { err = errnoErr(e1) } @@ -3133,7 +3188,7 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { } func SetConsoleCP(cp uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetConsoleCP.Addr(), uintptr(cp)) if r1 == 0 { err = errnoErr(e1) } @@ -3141,7 +3196,7 @@ func SetConsoleCP(cp uint32) (err error) { } func setConsoleCursorPosition(console Handle, position uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) + r1, _, e1 := syscall.SyscallN(procSetConsoleCursorPosition.Addr(), uintptr(console), uintptr(position)) if r1 == 0 { err = errnoErr(e1) } @@ -3149,7 +3204,7 @@ func setConsoleCursorPosition(console Handle, position uint32) (err error) { } func SetConsoleMode(console Handle, mode uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetConsoleMode.Addr(), 2, uintptr(console), uintptr(mode), 0) + r1, _, e1 := syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(console), uintptr(mode)) if r1 == 0 { err = errnoErr(e1) } @@ -3157,7 +3212,7 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { } func SetConsoleOutputCP(cp uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetConsoleOutputCP.Addr(), uintptr(cp)) if r1 == 0 { err = errnoErr(e1) } @@ -3165,7 +3220,7 @@ func SetConsoleOutputCP(cp uint32) (err error) { } func SetCurrentDirectory(path *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetCurrentDirectoryW.Addr(), uintptr(unsafe.Pointer(path))) if r1 == 0 { err = errnoErr(e1) } @@ -3173,7 +3228,7 @@ func SetCurrentDirectory(path *uint16) (err error) { } func SetDefaultDllDirectories(directoryFlags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetDefaultDllDirectories.Addr(), 1, uintptr(directoryFlags), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetDefaultDllDirectories.Addr(), uintptr(directoryFlags)) if r1 == 0 { err = errnoErr(e1) } @@ -3190,7 +3245,7 @@ func SetDllDirectory(path string) (err error) { } func _SetDllDirectory(path *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procSetDllDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetDllDirectoryW.Addr(), uintptr(unsafe.Pointer(path))) if r1 == 0 { err = errnoErr(e1) } @@ -3198,7 +3253,7 @@ func _SetDllDirectory(path *uint16) (err error) { } func SetEndOfFile(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procSetEndOfFile.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetEndOfFile.Addr(), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -3206,7 +3261,7 @@ func SetEndOfFile(handle Handle) (err error) { } func SetEnvironmentVariable(name *uint16, value *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procSetEnvironmentVariableW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(value)), 0) + r1, _, e1 := syscall.SyscallN(procSetEnvironmentVariableW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(value))) if r1 == 0 { err = errnoErr(e1) } @@ -3214,13 +3269,13 @@ func SetEnvironmentVariable(name *uint16, value *uint16) (err error) { } func SetErrorMode(mode uint32) (ret uint32) { - r0, _, _ := syscall.Syscall(procSetErrorMode.Addr(), 1, uintptr(mode), 0, 0) + r0, _, _ := syscall.SyscallN(procSetErrorMode.Addr(), uintptr(mode)) ret = uint32(r0) return } func SetEvent(event Handle) (err error) { - r1, _, e1 := syscall.Syscall(procSetEvent.Addr(), 1, uintptr(event), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetEvent.Addr(), uintptr(event)) if r1 == 0 { err = errnoErr(e1) } @@ -3228,7 +3283,7 @@ func SetEvent(event Handle) (err error) { } func SetFileAttributes(name *uint16, attrs uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetFileAttributesW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(attrs), 0) + r1, _, e1 := syscall.SyscallN(procSetFileAttributesW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(attrs)) if r1 == 0 { err = errnoErr(e1) } @@ -3236,7 +3291,7 @@ func SetFileAttributes(name *uint16, attrs uint32) (err error) { } func SetFileCompletionNotificationModes(handle Handle, flags uint8) (err error) { - r1, _, e1 := syscall.Syscall(procSetFileCompletionNotificationModes.Addr(), 2, uintptr(handle), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procSetFileCompletionNotificationModes.Addr(), uintptr(handle), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -3244,7 +3299,7 @@ func SetFileCompletionNotificationModes(handle Handle, flags uint8) (err error) } func SetFileInformationByHandle(handle Handle, class uint32, inBuffer *byte, inBufferLen uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetFileInformationByHandle.Addr(), 4, uintptr(handle), uintptr(class), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferLen), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetFileInformationByHandle.Addr(), uintptr(handle), uintptr(class), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferLen)) if r1 == 0 { err = errnoErr(e1) } @@ -3252,7 +3307,7 @@ func SetFileInformationByHandle(handle Handle, class uint32, inBuffer *byte, inB } func SetFilePointer(handle Handle, lowoffset int32, highoffsetptr *int32, whence uint32) (newlowoffset uint32, err error) { - r0, _, e1 := syscall.Syscall6(procSetFilePointer.Addr(), 4, uintptr(handle), uintptr(lowoffset), uintptr(unsafe.Pointer(highoffsetptr)), uintptr(whence), 0, 0) + r0, _, e1 := syscall.SyscallN(procSetFilePointer.Addr(), uintptr(handle), uintptr(lowoffset), uintptr(unsafe.Pointer(highoffsetptr)), uintptr(whence)) newlowoffset = uint32(r0) if newlowoffset == 0xffffffff { err = errnoErr(e1) @@ -3261,7 +3316,7 @@ func SetFilePointer(handle Handle, lowoffset int32, highoffsetptr *int32, whence } func SetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetime) (err error) { - r1, _, e1 := syscall.Syscall6(procSetFileTime.Addr(), 4, uintptr(handle), uintptr(unsafe.Pointer(ctime)), uintptr(unsafe.Pointer(atime)), uintptr(unsafe.Pointer(wtime)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetFileTime.Addr(), uintptr(handle), uintptr(unsafe.Pointer(ctime)), uintptr(unsafe.Pointer(atime)), uintptr(unsafe.Pointer(wtime))) if r1 == 0 { err = errnoErr(e1) } @@ -3269,7 +3324,7 @@ func SetFileTime(handle Handle, ctime *Filetime, atime *Filetime, wtime *Filetim } func SetFileValidData(handle Handle, validDataLength int64) (err error) { - r1, _, e1 := syscall.Syscall(procSetFileValidData.Addr(), 2, uintptr(handle), uintptr(validDataLength), 0) + r1, _, e1 := syscall.SyscallN(procSetFileValidData.Addr(), uintptr(handle), uintptr(validDataLength)) if r1 == 0 { err = errnoErr(e1) } @@ -3277,7 +3332,7 @@ func SetFileValidData(handle Handle, validDataLength int64) (err error) { } func SetHandleInformation(handle Handle, mask uint32, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetHandleInformation.Addr(), 3, uintptr(handle), uintptr(mask), uintptr(flags)) + r1, _, e1 := syscall.SyscallN(procSetHandleInformation.Addr(), uintptr(handle), uintptr(mask), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -3285,7 +3340,7 @@ func SetHandleInformation(handle Handle, mask uint32, flags uint32) (err error) } func SetInformationJobObject(job Handle, JobObjectInformationClass uint32, JobObjectInformation uintptr, JobObjectInformationLength uint32) (ret int, err error) { - r0, _, e1 := syscall.Syscall6(procSetInformationJobObject.Addr(), 4, uintptr(job), uintptr(JobObjectInformationClass), uintptr(JobObjectInformation), uintptr(JobObjectInformationLength), 0, 0) + r0, _, e1 := syscall.SyscallN(procSetInformationJobObject.Addr(), uintptr(job), uintptr(JobObjectInformationClass), uintptr(JobObjectInformation), uintptr(JobObjectInformationLength)) ret = int(r0) if ret == 0 { err = errnoErr(e1) @@ -3294,7 +3349,7 @@ func SetInformationJobObject(job Handle, JobObjectInformationClass uint32, JobOb } func SetNamedPipeHandleState(pipe Handle, state *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetNamedPipeHandleState.Addr(), 4, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetNamedPipeHandleState.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout))) if r1 == 0 { err = errnoErr(e1) } @@ -3302,7 +3357,7 @@ func SetNamedPipeHandleState(pipe Handle, state *uint32, maxCollectionCount *uin } func SetPriorityClass(process Handle, priorityClass uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetPriorityClass.Addr(), 2, uintptr(process), uintptr(priorityClass), 0) + r1, _, e1 := syscall.SyscallN(procSetPriorityClass.Addr(), uintptr(process), uintptr(priorityClass)) if r1 == 0 { err = errnoErr(e1) } @@ -3314,7 +3369,7 @@ func SetProcessPriorityBoost(process Handle, disable bool) (err error) { if disable { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procSetProcessPriorityBoost.Addr(), 2, uintptr(process), uintptr(_p0), 0) + r1, _, e1 := syscall.SyscallN(procSetProcessPriorityBoost.Addr(), uintptr(process), uintptr(_p0)) if r1 == 0 { err = errnoErr(e1) } @@ -3322,7 +3377,7 @@ func SetProcessPriorityBoost(process Handle, disable bool) (err error) { } func SetProcessShutdownParameters(level uint32, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetProcessShutdownParameters.Addr(), 2, uintptr(level), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procSetProcessShutdownParameters.Addr(), uintptr(level), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -3330,7 +3385,7 @@ func SetProcessShutdownParameters(level uint32, flags uint32) (err error) { } func SetProcessWorkingSetSizeEx(hProcess Handle, dwMinimumWorkingSetSize uintptr, dwMaximumWorkingSetSize uintptr, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetProcessWorkingSetSizeEx.Addr(), 4, uintptr(hProcess), uintptr(dwMinimumWorkingSetSize), uintptr(dwMaximumWorkingSetSize), uintptr(flags), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetProcessWorkingSetSizeEx.Addr(), uintptr(hProcess), uintptr(dwMinimumWorkingSetSize), uintptr(dwMaximumWorkingSetSize), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -3338,7 +3393,7 @@ func SetProcessWorkingSetSizeEx(hProcess Handle, dwMinimumWorkingSetSize uintptr } func SetStdHandle(stdhandle uint32, handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procSetStdHandle.Addr(), 2, uintptr(stdhandle), uintptr(handle), 0) + r1, _, e1 := syscall.SyscallN(procSetStdHandle.Addr(), uintptr(stdhandle), uintptr(handle)) if r1 == 0 { err = errnoErr(e1) } @@ -3346,7 +3401,7 @@ func SetStdHandle(stdhandle uint32, handle Handle) (err error) { } func SetVolumeLabel(rootPathName *uint16, volumeName *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procSetVolumeLabelW.Addr(), 2, uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeName)), 0) + r1, _, e1 := syscall.SyscallN(procSetVolumeLabelW.Addr(), uintptr(unsafe.Pointer(rootPathName)), uintptr(unsafe.Pointer(volumeName))) if r1 == 0 { err = errnoErr(e1) } @@ -3354,7 +3409,7 @@ func SetVolumeLabel(rootPathName *uint16, volumeName *uint16) (err error) { } func SetVolumeMountPoint(volumeMountPoint *uint16, volumeName *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procSetVolumeMountPointW.Addr(), 2, uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(unsafe.Pointer(volumeName)), 0) + r1, _, e1 := syscall.SyscallN(procSetVolumeMountPointW.Addr(), uintptr(unsafe.Pointer(volumeMountPoint)), uintptr(unsafe.Pointer(volumeName))) if r1 == 0 { err = errnoErr(e1) } @@ -3362,7 +3417,7 @@ func SetVolumeMountPoint(volumeMountPoint *uint16, volumeName *uint16) (err erro } func SetupComm(handle Handle, dwInQueue uint32, dwOutQueue uint32) (err error) { - r1, _, e1 := syscall.Syscall(procSetupComm.Addr(), 3, uintptr(handle), uintptr(dwInQueue), uintptr(dwOutQueue)) + r1, _, e1 := syscall.SyscallN(procSetupComm.Addr(), uintptr(handle), uintptr(dwInQueue), uintptr(dwOutQueue)) if r1 == 0 { err = errnoErr(e1) } @@ -3370,7 +3425,7 @@ func SetupComm(handle Handle, dwInQueue uint32, dwOutQueue uint32) (err error) { } func SizeofResource(module Handle, resInfo Handle) (size uint32, err error) { - r0, _, e1 := syscall.Syscall(procSizeofResource.Addr(), 2, uintptr(module), uintptr(resInfo), 0) + r0, _, e1 := syscall.SyscallN(procSizeofResource.Addr(), uintptr(module), uintptr(resInfo)) size = uint32(r0) if size == 0 { err = errnoErr(e1) @@ -3383,13 +3438,13 @@ func SleepEx(milliseconds uint32, alertable bool) (ret uint32) { if alertable { _p0 = 1 } - r0, _, _ := syscall.Syscall(procSleepEx.Addr(), 2, uintptr(milliseconds), uintptr(_p0), 0) + r0, _, _ := syscall.SyscallN(procSleepEx.Addr(), uintptr(milliseconds), uintptr(_p0)) ret = uint32(r0) return } func TerminateJobObject(job Handle, exitCode uint32) (err error) { - r1, _, e1 := syscall.Syscall(procTerminateJobObject.Addr(), 2, uintptr(job), uintptr(exitCode), 0) + r1, _, e1 := syscall.SyscallN(procTerminateJobObject.Addr(), uintptr(job), uintptr(exitCode)) if r1 == 0 { err = errnoErr(e1) } @@ -3397,7 +3452,7 @@ func TerminateJobObject(job Handle, exitCode uint32) (err error) { } func TerminateProcess(handle Handle, exitcode uint32) (err error) { - r1, _, e1 := syscall.Syscall(procTerminateProcess.Addr(), 2, uintptr(handle), uintptr(exitcode), 0) + r1, _, e1 := syscall.SyscallN(procTerminateProcess.Addr(), uintptr(handle), uintptr(exitcode)) if r1 == 0 { err = errnoErr(e1) } @@ -3405,7 +3460,7 @@ func TerminateProcess(handle Handle, exitcode uint32) (err error) { } func Thread32First(snapshot Handle, threadEntry *ThreadEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procThread32First.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(threadEntry)), 0) + r1, _, e1 := syscall.SyscallN(procThread32First.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(threadEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -3413,7 +3468,7 @@ func Thread32First(snapshot Handle, threadEntry *ThreadEntry32) (err error) { } func Thread32Next(snapshot Handle, threadEntry *ThreadEntry32) (err error) { - r1, _, e1 := syscall.Syscall(procThread32Next.Addr(), 2, uintptr(snapshot), uintptr(unsafe.Pointer(threadEntry)), 0) + r1, _, e1 := syscall.SyscallN(procThread32Next.Addr(), uintptr(snapshot), uintptr(unsafe.Pointer(threadEntry))) if r1 == 0 { err = errnoErr(e1) } @@ -3421,7 +3476,7 @@ func Thread32Next(snapshot Handle, threadEntry *ThreadEntry32) (err error) { } func UnlockFileEx(file Handle, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall6(procUnlockFileEx.Addr(), 5, uintptr(file), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procUnlockFileEx.Addr(), uintptr(file), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -3429,7 +3484,7 @@ func UnlockFileEx(file Handle, reserved uint32, bytesLow uint32, bytesHigh uint3 } func UnmapViewOfFile(addr uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procUnmapViewOfFile.Addr(), 1, uintptr(addr), 0, 0) + r1, _, e1 := syscall.SyscallN(procUnmapViewOfFile.Addr(), uintptr(addr)) if r1 == 0 { err = errnoErr(e1) } @@ -3437,7 +3492,7 @@ func UnmapViewOfFile(addr uintptr) (err error) { } func updateProcThreadAttribute(attrlist *ProcThreadAttributeList, flags uint32, attr uintptr, value unsafe.Pointer, size uintptr, prevvalue unsafe.Pointer, returnedsize *uintptr) (err error) { - r1, _, e1 := syscall.Syscall9(procUpdateProcThreadAttribute.Addr(), 7, uintptr(unsafe.Pointer(attrlist)), uintptr(flags), uintptr(attr), uintptr(value), uintptr(size), uintptr(prevvalue), uintptr(unsafe.Pointer(returnedsize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procUpdateProcThreadAttribute.Addr(), uintptr(unsafe.Pointer(attrlist)), uintptr(flags), uintptr(attr), uintptr(value), uintptr(size), uintptr(prevvalue), uintptr(unsafe.Pointer(returnedsize))) if r1 == 0 { err = errnoErr(e1) } @@ -3445,7 +3500,7 @@ func updateProcThreadAttribute(attrlist *ProcThreadAttributeList, flags uint32, } func VirtualAlloc(address uintptr, size uintptr, alloctype uint32, protect uint32) (value uintptr, err error) { - r0, _, e1 := syscall.Syscall6(procVirtualAlloc.Addr(), 4, uintptr(address), uintptr(size), uintptr(alloctype), uintptr(protect), 0, 0) + r0, _, e1 := syscall.SyscallN(procVirtualAlloc.Addr(), uintptr(address), uintptr(size), uintptr(alloctype), uintptr(protect)) value = uintptr(r0) if value == 0 { err = errnoErr(e1) @@ -3454,7 +3509,7 @@ func VirtualAlloc(address uintptr, size uintptr, alloctype uint32, protect uint3 } func VirtualFree(address uintptr, size uintptr, freetype uint32) (err error) { - r1, _, e1 := syscall.Syscall(procVirtualFree.Addr(), 3, uintptr(address), uintptr(size), uintptr(freetype)) + r1, _, e1 := syscall.SyscallN(procVirtualFree.Addr(), uintptr(address), uintptr(size), uintptr(freetype)) if r1 == 0 { err = errnoErr(e1) } @@ -3462,7 +3517,7 @@ func VirtualFree(address uintptr, size uintptr, freetype uint32) (err error) { } func VirtualLock(addr uintptr, length uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procVirtualLock.Addr(), 2, uintptr(addr), uintptr(length), 0) + r1, _, e1 := syscall.SyscallN(procVirtualLock.Addr(), uintptr(addr), uintptr(length)) if r1 == 0 { err = errnoErr(e1) } @@ -3470,7 +3525,7 @@ func VirtualLock(addr uintptr, length uintptr) (err error) { } func VirtualProtect(address uintptr, size uintptr, newprotect uint32, oldprotect *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procVirtualProtect.Addr(), 4, uintptr(address), uintptr(size), uintptr(newprotect), uintptr(unsafe.Pointer(oldprotect)), 0, 0) + r1, _, e1 := syscall.SyscallN(procVirtualProtect.Addr(), uintptr(address), uintptr(size), uintptr(newprotect), uintptr(unsafe.Pointer(oldprotect))) if r1 == 0 { err = errnoErr(e1) } @@ -3478,7 +3533,7 @@ func VirtualProtect(address uintptr, size uintptr, newprotect uint32, oldprotect } func VirtualProtectEx(process Handle, address uintptr, size uintptr, newProtect uint32, oldProtect *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procVirtualProtectEx.Addr(), 5, uintptr(process), uintptr(address), uintptr(size), uintptr(newProtect), uintptr(unsafe.Pointer(oldProtect)), 0) + r1, _, e1 := syscall.SyscallN(procVirtualProtectEx.Addr(), uintptr(process), uintptr(address), uintptr(size), uintptr(newProtect), uintptr(unsafe.Pointer(oldProtect))) if r1 == 0 { err = errnoErr(e1) } @@ -3486,7 +3541,7 @@ func VirtualProtectEx(process Handle, address uintptr, size uintptr, newProtect } func VirtualQuery(address uintptr, buffer *MemoryBasicInformation, length uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procVirtualQuery.Addr(), 3, uintptr(address), uintptr(unsafe.Pointer(buffer)), uintptr(length)) + r1, _, e1 := syscall.SyscallN(procVirtualQuery.Addr(), uintptr(address), uintptr(unsafe.Pointer(buffer)), uintptr(length)) if r1 == 0 { err = errnoErr(e1) } @@ -3494,7 +3549,7 @@ func VirtualQuery(address uintptr, buffer *MemoryBasicInformation, length uintpt } func VirtualQueryEx(process Handle, address uintptr, buffer *MemoryBasicInformation, length uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procVirtualQueryEx.Addr(), 4, uintptr(process), uintptr(address), uintptr(unsafe.Pointer(buffer)), uintptr(length), 0, 0) + r1, _, e1 := syscall.SyscallN(procVirtualQueryEx.Addr(), uintptr(process), uintptr(address), uintptr(unsafe.Pointer(buffer)), uintptr(length)) if r1 == 0 { err = errnoErr(e1) } @@ -3502,7 +3557,7 @@ func VirtualQueryEx(process Handle, address uintptr, buffer *MemoryBasicInformat } func VirtualUnlock(addr uintptr, length uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procVirtualUnlock.Addr(), 2, uintptr(addr), uintptr(length), 0) + r1, _, e1 := syscall.SyscallN(procVirtualUnlock.Addr(), uintptr(addr), uintptr(length)) if r1 == 0 { err = errnoErr(e1) } @@ -3510,13 +3565,13 @@ func VirtualUnlock(addr uintptr, length uintptr) (err error) { } func WTSGetActiveConsoleSessionId() (sessionID uint32) { - r0, _, _ := syscall.Syscall(procWTSGetActiveConsoleSessionId.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procWTSGetActiveConsoleSessionId.Addr()) sessionID = uint32(r0) return } func WaitCommEvent(handle Handle, lpEvtMask *uint32, lpOverlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall(procWaitCommEvent.Addr(), 3, uintptr(handle), uintptr(unsafe.Pointer(lpEvtMask)), uintptr(unsafe.Pointer(lpOverlapped))) + r1, _, e1 := syscall.SyscallN(procWaitCommEvent.Addr(), uintptr(handle), uintptr(unsafe.Pointer(lpEvtMask)), uintptr(unsafe.Pointer(lpOverlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -3528,7 +3583,7 @@ func waitForMultipleObjects(count uint32, handles uintptr, waitAll bool, waitMil if waitAll { _p0 = 1 } - r0, _, e1 := syscall.Syscall6(procWaitForMultipleObjects.Addr(), 4, uintptr(count), uintptr(handles), uintptr(_p0), uintptr(waitMilliseconds), 0, 0) + r0, _, e1 := syscall.SyscallN(procWaitForMultipleObjects.Addr(), uintptr(count), uintptr(handles), uintptr(_p0), uintptr(waitMilliseconds)) event = uint32(r0) if event == 0xffffffff { err = errnoErr(e1) @@ -3537,7 +3592,7 @@ func waitForMultipleObjects(count uint32, handles uintptr, waitAll bool, waitMil } func WaitForSingleObject(handle Handle, waitMilliseconds uint32) (event uint32, err error) { - r0, _, e1 := syscall.Syscall(procWaitForSingleObject.Addr(), 2, uintptr(handle), uintptr(waitMilliseconds), 0) + r0, _, e1 := syscall.SyscallN(procWaitForSingleObject.Addr(), uintptr(handle), uintptr(waitMilliseconds)) event = uint32(r0) if event == 0xffffffff { err = errnoErr(e1) @@ -3546,7 +3601,7 @@ func WaitForSingleObject(handle Handle, waitMilliseconds uint32) (event uint32, } func WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) { - r1, _, e1 := syscall.Syscall6(procWriteConsoleW.Addr(), 5, uintptr(console), uintptr(unsafe.Pointer(buf)), uintptr(towrite), uintptr(unsafe.Pointer(written)), uintptr(unsafe.Pointer(reserved)), 0) + r1, _, e1 := syscall.SyscallN(procWriteConsoleW.Addr(), uintptr(console), uintptr(unsafe.Pointer(buf)), uintptr(towrite), uintptr(unsafe.Pointer(written)), uintptr(unsafe.Pointer(reserved))) if r1 == 0 { err = errnoErr(e1) } @@ -3558,7 +3613,7 @@ func writeFile(handle Handle, buf []byte, done *uint32, overlapped *Overlapped) if len(buf) > 0 { _p0 = &buf[0] } - r1, _, e1 := syscall.Syscall6(procWriteFile.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(done)), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procWriteFile.Addr(), uintptr(handle), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(unsafe.Pointer(done)), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -3566,7 +3621,7 @@ func writeFile(handle Handle, buf []byte, done *uint32, overlapped *Overlapped) } func WriteProcessMemory(process Handle, baseAddress uintptr, buffer *byte, size uintptr, numberOfBytesWritten *uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procWriteProcessMemory.Addr(), 5, uintptr(process), uintptr(baseAddress), uintptr(unsafe.Pointer(buffer)), uintptr(size), uintptr(unsafe.Pointer(numberOfBytesWritten)), 0) + r1, _, e1 := syscall.SyscallN(procWriteProcessMemory.Addr(), uintptr(process), uintptr(baseAddress), uintptr(unsafe.Pointer(buffer)), uintptr(size), uintptr(unsafe.Pointer(numberOfBytesWritten))) if r1 == 0 { err = errnoErr(e1) } @@ -3574,7 +3629,7 @@ func WriteProcessMemory(process Handle, baseAddress uintptr, buffer *byte, size } func AcceptEx(ls Handle, as Handle, buf *byte, rxdatalen uint32, laddrlen uint32, raddrlen uint32, recvd *uint32, overlapped *Overlapped) (err error) { - r1, _, e1 := syscall.Syscall9(procAcceptEx.Addr(), 8, uintptr(ls), uintptr(as), uintptr(unsafe.Pointer(buf)), uintptr(rxdatalen), uintptr(laddrlen), uintptr(raddrlen), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(overlapped)), 0) + r1, _, e1 := syscall.SyscallN(procAcceptEx.Addr(), uintptr(ls), uintptr(as), uintptr(unsafe.Pointer(buf)), uintptr(rxdatalen), uintptr(laddrlen), uintptr(raddrlen), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { err = errnoErr(e1) } @@ -3582,12 +3637,12 @@ func AcceptEx(ls Handle, as Handle, buf *byte, rxdatalen uint32, laddrlen uint32 } func GetAcceptExSockaddrs(buf *byte, rxdatalen uint32, laddrlen uint32, raddrlen uint32, lrsa **RawSockaddrAny, lrsalen *int32, rrsa **RawSockaddrAny, rrsalen *int32) { - syscall.Syscall9(procGetAcceptExSockaddrs.Addr(), 8, uintptr(unsafe.Pointer(buf)), uintptr(rxdatalen), uintptr(laddrlen), uintptr(raddrlen), uintptr(unsafe.Pointer(lrsa)), uintptr(unsafe.Pointer(lrsalen)), uintptr(unsafe.Pointer(rrsa)), uintptr(unsafe.Pointer(rrsalen)), 0) + syscall.SyscallN(procGetAcceptExSockaddrs.Addr(), uintptr(unsafe.Pointer(buf)), uintptr(rxdatalen), uintptr(laddrlen), uintptr(raddrlen), uintptr(unsafe.Pointer(lrsa)), uintptr(unsafe.Pointer(lrsalen)), uintptr(unsafe.Pointer(rrsa)), uintptr(unsafe.Pointer(rrsalen))) return } func TransmitFile(s Handle, handle Handle, bytesToWrite uint32, bytsPerSend uint32, overlapped *Overlapped, transmitFileBuf *TransmitFileBuffers, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procTransmitFile.Addr(), 7, uintptr(s), uintptr(handle), uintptr(bytesToWrite), uintptr(bytsPerSend), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(transmitFileBuf)), uintptr(flags), 0, 0) + r1, _, e1 := syscall.SyscallN(procTransmitFile.Addr(), uintptr(s), uintptr(handle), uintptr(bytesToWrite), uintptr(bytsPerSend), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(transmitFileBuf)), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -3595,7 +3650,7 @@ func TransmitFile(s Handle, handle Handle, bytesToWrite uint32, bytsPerSend uint } func NetApiBufferFree(buf *byte) (neterr error) { - r0, _, _ := syscall.Syscall(procNetApiBufferFree.Addr(), 1, uintptr(unsafe.Pointer(buf)), 0, 0) + r0, _, _ := syscall.SyscallN(procNetApiBufferFree.Addr(), uintptr(unsafe.Pointer(buf))) if r0 != 0 { neterr = syscall.Errno(r0) } @@ -3603,7 +3658,7 @@ func NetApiBufferFree(buf *byte) (neterr error) { } func NetGetJoinInformation(server *uint16, name **uint16, bufType *uint32) (neterr error) { - r0, _, _ := syscall.Syscall(procNetGetJoinInformation.Addr(), 3, uintptr(unsafe.Pointer(server)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(bufType))) + r0, _, _ := syscall.SyscallN(procNetGetJoinInformation.Addr(), uintptr(unsafe.Pointer(server)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(bufType))) if r0 != 0 { neterr = syscall.Errno(r0) } @@ -3611,7 +3666,7 @@ func NetGetJoinInformation(server *uint16, name **uint16, bufType *uint32) (nete } func NetUserEnum(serverName *uint16, level uint32, filter uint32, buf **byte, prefMaxLen uint32, entriesRead *uint32, totalEntries *uint32, resumeHandle *uint32) (neterr error) { - r0, _, _ := syscall.Syscall9(procNetUserEnum.Addr(), 8, uintptr(unsafe.Pointer(serverName)), uintptr(level), uintptr(filter), uintptr(unsafe.Pointer(buf)), uintptr(prefMaxLen), uintptr(unsafe.Pointer(entriesRead)), uintptr(unsafe.Pointer(totalEntries)), uintptr(unsafe.Pointer(resumeHandle)), 0) + r0, _, _ := syscall.SyscallN(procNetUserEnum.Addr(), uintptr(unsafe.Pointer(serverName)), uintptr(level), uintptr(filter), uintptr(unsafe.Pointer(buf)), uintptr(prefMaxLen), uintptr(unsafe.Pointer(entriesRead)), uintptr(unsafe.Pointer(totalEntries)), uintptr(unsafe.Pointer(resumeHandle))) if r0 != 0 { neterr = syscall.Errno(r0) } @@ -3619,7 +3674,7 @@ func NetUserEnum(serverName *uint16, level uint32, filter uint32, buf **byte, pr } func NetUserGetInfo(serverName *uint16, userName *uint16, level uint32, buf **byte) (neterr error) { - r0, _, _ := syscall.Syscall6(procNetUserGetInfo.Addr(), 4, uintptr(unsafe.Pointer(serverName)), uintptr(unsafe.Pointer(userName)), uintptr(level), uintptr(unsafe.Pointer(buf)), 0, 0) + r0, _, _ := syscall.SyscallN(procNetUserGetInfo.Addr(), uintptr(unsafe.Pointer(serverName)), uintptr(unsafe.Pointer(userName)), uintptr(level), uintptr(unsafe.Pointer(buf))) if r0 != 0 { neterr = syscall.Errno(r0) } @@ -3627,7 +3682,7 @@ func NetUserGetInfo(serverName *uint16, userName *uint16, level uint32, buf **by } func NtCreateFile(handle *Handle, access uint32, oa *OBJECT_ATTRIBUTES, iosb *IO_STATUS_BLOCK, allocationSize *int64, attributes uint32, share uint32, disposition uint32, options uint32, eabuffer uintptr, ealength uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall12(procNtCreateFile.Addr(), 11, uintptr(unsafe.Pointer(handle)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(allocationSize)), uintptr(attributes), uintptr(share), uintptr(disposition), uintptr(options), uintptr(eabuffer), uintptr(ealength), 0) + r0, _, _ := syscall.SyscallN(procNtCreateFile.Addr(), uintptr(unsafe.Pointer(handle)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(allocationSize)), uintptr(attributes), uintptr(share), uintptr(disposition), uintptr(options), uintptr(eabuffer), uintptr(ealength)) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3635,7 +3690,7 @@ func NtCreateFile(handle *Handle, access uint32, oa *OBJECT_ATTRIBUTES, iosb *IO } func NtCreateNamedPipeFile(pipe *Handle, access uint32, oa *OBJECT_ATTRIBUTES, iosb *IO_STATUS_BLOCK, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (ntstatus error) { - r0, _, _ := syscall.Syscall15(procNtCreateNamedPipeFile.Addr(), 14, uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout)), 0) + r0, _, _ := syscall.SyscallN(procNtCreateNamedPipeFile.Addr(), uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3643,7 +3698,7 @@ func NtCreateNamedPipeFile(pipe *Handle, access uint32, oa *OBJECT_ATTRIBUTES, i } func NtQueryInformationProcess(proc Handle, procInfoClass int32, procInfo unsafe.Pointer, procInfoLen uint32, retLen *uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procNtQueryInformationProcess.Addr(), 5, uintptr(proc), uintptr(procInfoClass), uintptr(procInfo), uintptr(procInfoLen), uintptr(unsafe.Pointer(retLen)), 0) + r0, _, _ := syscall.SyscallN(procNtQueryInformationProcess.Addr(), uintptr(proc), uintptr(procInfoClass), uintptr(procInfo), uintptr(procInfoLen), uintptr(unsafe.Pointer(retLen))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3651,7 +3706,7 @@ func NtQueryInformationProcess(proc Handle, procInfoClass int32, procInfo unsafe } func NtQuerySystemInformation(sysInfoClass int32, sysInfo unsafe.Pointer, sysInfoLen uint32, retLen *uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procNtQuerySystemInformation.Addr(), 4, uintptr(sysInfoClass), uintptr(sysInfo), uintptr(sysInfoLen), uintptr(unsafe.Pointer(retLen)), 0, 0) + r0, _, _ := syscall.SyscallN(procNtQuerySystemInformation.Addr(), uintptr(sysInfoClass), uintptr(sysInfo), uintptr(sysInfoLen), uintptr(unsafe.Pointer(retLen))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3659,7 +3714,7 @@ func NtQuerySystemInformation(sysInfoClass int32, sysInfo unsafe.Pointer, sysInf } func NtSetInformationFile(handle Handle, iosb *IO_STATUS_BLOCK, inBuffer *byte, inBufferLen uint32, class uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procNtSetInformationFile.Addr(), 5, uintptr(handle), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferLen), uintptr(class), 0) + r0, _, _ := syscall.SyscallN(procNtSetInformationFile.Addr(), uintptr(handle), uintptr(unsafe.Pointer(iosb)), uintptr(unsafe.Pointer(inBuffer)), uintptr(inBufferLen), uintptr(class)) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3667,7 +3722,7 @@ func NtSetInformationFile(handle Handle, iosb *IO_STATUS_BLOCK, inBuffer *byte, } func NtSetInformationProcess(proc Handle, procInfoClass int32, procInfo unsafe.Pointer, procInfoLen uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procNtSetInformationProcess.Addr(), 4, uintptr(proc), uintptr(procInfoClass), uintptr(procInfo), uintptr(procInfoLen), 0, 0) + r0, _, _ := syscall.SyscallN(procNtSetInformationProcess.Addr(), uintptr(proc), uintptr(procInfoClass), uintptr(procInfo), uintptr(procInfoLen)) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3675,7 +3730,7 @@ func NtSetInformationProcess(proc Handle, procInfoClass int32, procInfo unsafe.P } func NtSetSystemInformation(sysInfoClass int32, sysInfo unsafe.Pointer, sysInfoLen uint32) (ntstatus error) { - r0, _, _ := syscall.Syscall(procNtSetSystemInformation.Addr(), 3, uintptr(sysInfoClass), uintptr(sysInfo), uintptr(sysInfoLen)) + r0, _, _ := syscall.SyscallN(procNtSetSystemInformation.Addr(), uintptr(sysInfoClass), uintptr(sysInfo), uintptr(sysInfoLen)) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3683,13 +3738,13 @@ func NtSetSystemInformation(sysInfoClass int32, sysInfo unsafe.Pointer, sysInfoL } func RtlAddFunctionTable(functionTable *RUNTIME_FUNCTION, entryCount uint32, baseAddress uintptr) (ret bool) { - r0, _, _ := syscall.Syscall(procRtlAddFunctionTable.Addr(), 3, uintptr(unsafe.Pointer(functionTable)), uintptr(entryCount), uintptr(baseAddress)) + r0, _, _ := syscall.SyscallN(procRtlAddFunctionTable.Addr(), uintptr(unsafe.Pointer(functionTable)), uintptr(entryCount), uintptr(baseAddress)) ret = r0 != 0 return } func RtlDefaultNpAcl(acl **ACL) (ntstatus error) { - r0, _, _ := syscall.Syscall(procRtlDefaultNpAcl.Addr(), 1, uintptr(unsafe.Pointer(acl)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDefaultNpAcl.Addr(), uintptr(unsafe.Pointer(acl))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3697,13 +3752,13 @@ func RtlDefaultNpAcl(acl **ACL) (ntstatus error) { } func RtlDeleteFunctionTable(functionTable *RUNTIME_FUNCTION) (ret bool) { - r0, _, _ := syscall.Syscall(procRtlDeleteFunctionTable.Addr(), 1, uintptr(unsafe.Pointer(functionTable)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDeleteFunctionTable.Addr(), uintptr(unsafe.Pointer(functionTable))) ret = r0 != 0 return } func RtlDosPathNameToNtPathName(dosName *uint16, ntName *NTUnicodeString, ntFileNamePart *uint16, relativeName *RTL_RELATIVE_NAME) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procRtlDosPathNameToNtPathName_U_WithStatus.Addr(), 4, uintptr(unsafe.Pointer(dosName)), uintptr(unsafe.Pointer(ntName)), uintptr(unsafe.Pointer(ntFileNamePart)), uintptr(unsafe.Pointer(relativeName)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDosPathNameToNtPathName_U_WithStatus.Addr(), uintptr(unsafe.Pointer(dosName)), uintptr(unsafe.Pointer(ntName)), uintptr(unsafe.Pointer(ntFileNamePart)), uintptr(unsafe.Pointer(relativeName))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3711,7 +3766,7 @@ func RtlDosPathNameToNtPathName(dosName *uint16, ntName *NTUnicodeString, ntFile } func RtlDosPathNameToRelativeNtPathName(dosName *uint16, ntName *NTUnicodeString, ntFileNamePart *uint16, relativeName *RTL_RELATIVE_NAME) (ntstatus error) { - r0, _, _ := syscall.Syscall6(procRtlDosPathNameToRelativeNtPathName_U_WithStatus.Addr(), 4, uintptr(unsafe.Pointer(dosName)), uintptr(unsafe.Pointer(ntName)), uintptr(unsafe.Pointer(ntFileNamePart)), uintptr(unsafe.Pointer(relativeName)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDosPathNameToRelativeNtPathName_U_WithStatus.Addr(), uintptr(unsafe.Pointer(dosName)), uintptr(unsafe.Pointer(ntName)), uintptr(unsafe.Pointer(ntFileNamePart)), uintptr(unsafe.Pointer(relativeName))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3719,18 +3774,18 @@ func RtlDosPathNameToRelativeNtPathName(dosName *uint16, ntName *NTUnicodeString } func RtlGetCurrentPeb() (peb *PEB) { - r0, _, _ := syscall.Syscall(procRtlGetCurrentPeb.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procRtlGetCurrentPeb.Addr()) peb = (*PEB)(unsafe.Pointer(r0)) return } func rtlGetNtVersionNumbers(majorVersion *uint32, minorVersion *uint32, buildNumber *uint32) { - syscall.Syscall(procRtlGetNtVersionNumbers.Addr(), 3, uintptr(unsafe.Pointer(majorVersion)), uintptr(unsafe.Pointer(minorVersion)), uintptr(unsafe.Pointer(buildNumber))) + syscall.SyscallN(procRtlGetNtVersionNumbers.Addr(), uintptr(unsafe.Pointer(majorVersion)), uintptr(unsafe.Pointer(minorVersion)), uintptr(unsafe.Pointer(buildNumber))) return } func rtlGetVersion(info *OsVersionInfoEx) (ntstatus error) { - r0, _, _ := syscall.Syscall(procRtlGetVersion.Addr(), 1, uintptr(unsafe.Pointer(info)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlGetVersion.Addr(), uintptr(unsafe.Pointer(info))) if r0 != 0 { ntstatus = NTStatus(r0) } @@ -3738,23 +3793,23 @@ func rtlGetVersion(info *OsVersionInfoEx) (ntstatus error) { } func RtlInitString(destinationString *NTString, sourceString *byte) { - syscall.Syscall(procRtlInitString.Addr(), 2, uintptr(unsafe.Pointer(destinationString)), uintptr(unsafe.Pointer(sourceString)), 0) + syscall.SyscallN(procRtlInitString.Addr(), uintptr(unsafe.Pointer(destinationString)), uintptr(unsafe.Pointer(sourceString))) return } func RtlInitUnicodeString(destinationString *NTUnicodeString, sourceString *uint16) { - syscall.Syscall(procRtlInitUnicodeString.Addr(), 2, uintptr(unsafe.Pointer(destinationString)), uintptr(unsafe.Pointer(sourceString)), 0) + syscall.SyscallN(procRtlInitUnicodeString.Addr(), uintptr(unsafe.Pointer(destinationString)), uintptr(unsafe.Pointer(sourceString))) return } func rtlNtStatusToDosErrorNoTeb(ntstatus NTStatus) (ret syscall.Errno) { - r0, _, _ := syscall.Syscall(procRtlNtStatusToDosErrorNoTeb.Addr(), 1, uintptr(ntstatus), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlNtStatusToDosErrorNoTeb.Addr(), uintptr(ntstatus)) ret = syscall.Errno(r0) return } func clsidFromString(lpsz *uint16, pclsid *GUID) (ret error) { - r0, _, _ := syscall.Syscall(procCLSIDFromString.Addr(), 2, uintptr(unsafe.Pointer(lpsz)), uintptr(unsafe.Pointer(pclsid)), 0) + r0, _, _ := syscall.SyscallN(procCLSIDFromString.Addr(), uintptr(unsafe.Pointer(lpsz)), uintptr(unsafe.Pointer(pclsid))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -3762,7 +3817,7 @@ func clsidFromString(lpsz *uint16, pclsid *GUID) (ret error) { } func coCreateGuid(pguid *GUID) (ret error) { - r0, _, _ := syscall.Syscall(procCoCreateGuid.Addr(), 1, uintptr(unsafe.Pointer(pguid)), 0, 0) + r0, _, _ := syscall.SyscallN(procCoCreateGuid.Addr(), uintptr(unsafe.Pointer(pguid))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -3770,7 +3825,7 @@ func coCreateGuid(pguid *GUID) (ret error) { } func CoGetObject(name *uint16, bindOpts *BIND_OPTS3, guid *GUID, functionTable **uintptr) (ret error) { - r0, _, _ := syscall.Syscall6(procCoGetObject.Addr(), 4, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(bindOpts)), uintptr(unsafe.Pointer(guid)), uintptr(unsafe.Pointer(functionTable)), 0, 0) + r0, _, _ := syscall.SyscallN(procCoGetObject.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(bindOpts)), uintptr(unsafe.Pointer(guid)), uintptr(unsafe.Pointer(functionTable))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -3778,7 +3833,7 @@ func CoGetObject(name *uint16, bindOpts *BIND_OPTS3, guid *GUID, functionTable * } func CoInitializeEx(reserved uintptr, coInit uint32) (ret error) { - r0, _, _ := syscall.Syscall(procCoInitializeEx.Addr(), 2, uintptr(reserved), uintptr(coInit), 0) + r0, _, _ := syscall.SyscallN(procCoInitializeEx.Addr(), uintptr(reserved), uintptr(coInit)) if r0 != 0 { ret = syscall.Errno(r0) } @@ -3786,23 +3841,23 @@ func CoInitializeEx(reserved uintptr, coInit uint32) (ret error) { } func CoTaskMemFree(address unsafe.Pointer) { - syscall.Syscall(procCoTaskMemFree.Addr(), 1, uintptr(address), 0, 0) + syscall.SyscallN(procCoTaskMemFree.Addr(), uintptr(address)) return } func CoUninitialize() { - syscall.Syscall(procCoUninitialize.Addr(), 0, 0, 0, 0) + syscall.SyscallN(procCoUninitialize.Addr()) return } func stringFromGUID2(rguid *GUID, lpsz *uint16, cchMax int32) (chars int32) { - r0, _, _ := syscall.Syscall(procStringFromGUID2.Addr(), 3, uintptr(unsafe.Pointer(rguid)), uintptr(unsafe.Pointer(lpsz)), uintptr(cchMax)) + r0, _, _ := syscall.SyscallN(procStringFromGUID2.Addr(), uintptr(unsafe.Pointer(rguid)), uintptr(unsafe.Pointer(lpsz)), uintptr(cchMax)) chars = int32(r0) return } func EnumProcessModules(process Handle, module *Handle, cb uint32, cbNeeded *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procEnumProcessModules.Addr(), 4, uintptr(process), uintptr(unsafe.Pointer(module)), uintptr(cb), uintptr(unsafe.Pointer(cbNeeded)), 0, 0) + r1, _, e1 := syscall.SyscallN(procEnumProcessModules.Addr(), uintptr(process), uintptr(unsafe.Pointer(module)), uintptr(cb), uintptr(unsafe.Pointer(cbNeeded))) if r1 == 0 { err = errnoErr(e1) } @@ -3810,7 +3865,7 @@ func EnumProcessModules(process Handle, module *Handle, cb uint32, cbNeeded *uin } func EnumProcessModulesEx(process Handle, module *Handle, cb uint32, cbNeeded *uint32, filterFlag uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procEnumProcessModulesEx.Addr(), 5, uintptr(process), uintptr(unsafe.Pointer(module)), uintptr(cb), uintptr(unsafe.Pointer(cbNeeded)), uintptr(filterFlag), 0) + r1, _, e1 := syscall.SyscallN(procEnumProcessModulesEx.Addr(), uintptr(process), uintptr(unsafe.Pointer(module)), uintptr(cb), uintptr(unsafe.Pointer(cbNeeded)), uintptr(filterFlag)) if r1 == 0 { err = errnoErr(e1) } @@ -3818,7 +3873,7 @@ func EnumProcessModulesEx(process Handle, module *Handle, cb uint32, cbNeeded *u } func enumProcesses(processIds *uint32, nSize uint32, bytesReturned *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procEnumProcesses.Addr(), 3, uintptr(unsafe.Pointer(processIds)), uintptr(nSize), uintptr(unsafe.Pointer(bytesReturned))) + r1, _, e1 := syscall.SyscallN(procEnumProcesses.Addr(), uintptr(unsafe.Pointer(processIds)), uintptr(nSize), uintptr(unsafe.Pointer(bytesReturned))) if r1 == 0 { err = errnoErr(e1) } @@ -3826,7 +3881,7 @@ func enumProcesses(processIds *uint32, nSize uint32, bytesReturned *uint32) (err } func GetModuleBaseName(process Handle, module Handle, baseName *uint16, size uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetModuleBaseNameW.Addr(), 4, uintptr(process), uintptr(module), uintptr(unsafe.Pointer(baseName)), uintptr(size), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetModuleBaseNameW.Addr(), uintptr(process), uintptr(module), uintptr(unsafe.Pointer(baseName)), uintptr(size)) if r1 == 0 { err = errnoErr(e1) } @@ -3834,7 +3889,7 @@ func GetModuleBaseName(process Handle, module Handle, baseName *uint16, size uin } func GetModuleFileNameEx(process Handle, module Handle, filename *uint16, size uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetModuleFileNameExW.Addr(), 4, uintptr(process), uintptr(module), uintptr(unsafe.Pointer(filename)), uintptr(size), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetModuleFileNameExW.Addr(), uintptr(process), uintptr(module), uintptr(unsafe.Pointer(filename)), uintptr(size)) if r1 == 0 { err = errnoErr(e1) } @@ -3842,7 +3897,7 @@ func GetModuleFileNameEx(process Handle, module Handle, filename *uint16, size u } func GetModuleInformation(process Handle, module Handle, modinfo *ModuleInfo, cb uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetModuleInformation.Addr(), 4, uintptr(process), uintptr(module), uintptr(unsafe.Pointer(modinfo)), uintptr(cb), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetModuleInformation.Addr(), uintptr(process), uintptr(module), uintptr(unsafe.Pointer(modinfo)), uintptr(cb)) if r1 == 0 { err = errnoErr(e1) } @@ -3850,7 +3905,7 @@ func GetModuleInformation(process Handle, module Handle, modinfo *ModuleInfo, cb } func QueryWorkingSetEx(process Handle, pv uintptr, cb uint32) (err error) { - r1, _, e1 := syscall.Syscall(procQueryWorkingSetEx.Addr(), 3, uintptr(process), uintptr(pv), uintptr(cb)) + r1, _, e1 := syscall.SyscallN(procQueryWorkingSetEx.Addr(), uintptr(process), uintptr(pv), uintptr(cb)) if r1 == 0 { err = errnoErr(e1) } @@ -3862,7 +3917,7 @@ func SubscribeServiceChangeNotifications(service Handle, eventType uint32, callb if ret != nil { return } - r0, _, _ := syscall.Syscall6(procSubscribeServiceChangeNotifications.Addr(), 5, uintptr(service), uintptr(eventType), uintptr(callback), uintptr(callbackCtx), uintptr(unsafe.Pointer(subscription)), 0) + r0, _, _ := syscall.SyscallN(procSubscribeServiceChangeNotifications.Addr(), uintptr(service), uintptr(eventType), uintptr(callback), uintptr(callbackCtx), uintptr(unsafe.Pointer(subscription))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -3874,12 +3929,12 @@ func UnsubscribeServiceChangeNotifications(subscription uintptr) (err error) { if err != nil { return } - syscall.Syscall(procUnsubscribeServiceChangeNotifications.Addr(), 1, uintptr(subscription), 0, 0) + syscall.SyscallN(procUnsubscribeServiceChangeNotifications.Addr(), uintptr(subscription)) return } func GetUserNameEx(nameFormat uint32, nameBuffre *uint16, nSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetUserNameExW.Addr(), 3, uintptr(nameFormat), uintptr(unsafe.Pointer(nameBuffre)), uintptr(unsafe.Pointer(nSize))) + r1, _, e1 := syscall.SyscallN(procGetUserNameExW.Addr(), uintptr(nameFormat), uintptr(unsafe.Pointer(nameBuffre)), uintptr(unsafe.Pointer(nSize))) if r1&0xff == 0 { err = errnoErr(e1) } @@ -3887,7 +3942,7 @@ func GetUserNameEx(nameFormat uint32, nameBuffre *uint16, nSize *uint32) (err er } func TranslateName(accName *uint16, accNameFormat uint32, desiredNameFormat uint32, translatedName *uint16, nSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procTranslateNameW.Addr(), 5, uintptr(unsafe.Pointer(accName)), uintptr(accNameFormat), uintptr(desiredNameFormat), uintptr(unsafe.Pointer(translatedName)), uintptr(unsafe.Pointer(nSize)), 0) + r1, _, e1 := syscall.SyscallN(procTranslateNameW.Addr(), uintptr(unsafe.Pointer(accName)), uintptr(accNameFormat), uintptr(desiredNameFormat), uintptr(unsafe.Pointer(translatedName)), uintptr(unsafe.Pointer(nSize))) if r1&0xff == 0 { err = errnoErr(e1) } @@ -3895,7 +3950,7 @@ func TranslateName(accName *uint16, accNameFormat uint32, desiredNameFormat uint } func SetupDiBuildDriverInfoList(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverType SPDIT) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiBuildDriverInfoList.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType)) + r1, _, e1 := syscall.SyscallN(procSetupDiBuildDriverInfoList.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType)) if r1 == 0 { err = errnoErr(e1) } @@ -3903,7 +3958,7 @@ func SetupDiBuildDriverInfoList(deviceInfoSet DevInfo, deviceInfoData *DevInfoDa } func SetupDiCallClassInstaller(installFunction DI_FUNCTION, deviceInfoSet DevInfo, deviceInfoData *DevInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiCallClassInstaller.Addr(), 3, uintptr(installFunction), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData))) + r1, _, e1 := syscall.SyscallN(procSetupDiCallClassInstaller.Addr(), uintptr(installFunction), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -3911,7 +3966,7 @@ func SetupDiCallClassInstaller(installFunction DI_FUNCTION, deviceInfoSet DevInf } func SetupDiCancelDriverInfoSearch(deviceInfoSet DevInfo) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiCancelDriverInfoSearch.Addr(), 1, uintptr(deviceInfoSet), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetupDiCancelDriverInfoSearch.Addr(), uintptr(deviceInfoSet)) if r1 == 0 { err = errnoErr(e1) } @@ -3919,7 +3974,7 @@ func SetupDiCancelDriverInfoSearch(deviceInfoSet DevInfo) (err error) { } func setupDiClassGuidsFromNameEx(className *uint16, classGuidList *GUID, classGuidListSize uint32, requiredSize *uint32, machineName *uint16, reserved uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiClassGuidsFromNameExW.Addr(), 6, uintptr(unsafe.Pointer(className)), uintptr(unsafe.Pointer(classGuidList)), uintptr(classGuidListSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) + r1, _, e1 := syscall.SyscallN(procSetupDiClassGuidsFromNameExW.Addr(), uintptr(unsafe.Pointer(className)), uintptr(unsafe.Pointer(classGuidList)), uintptr(classGuidListSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) if r1 == 0 { err = errnoErr(e1) } @@ -3927,7 +3982,7 @@ func setupDiClassGuidsFromNameEx(className *uint16, classGuidList *GUID, classGu } func setupDiClassNameFromGuidEx(classGUID *GUID, className *uint16, classNameSize uint32, requiredSize *uint32, machineName *uint16, reserved uintptr) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiClassNameFromGuidExW.Addr(), 6, uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(className)), uintptr(classNameSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) + r1, _, e1 := syscall.SyscallN(procSetupDiClassNameFromGuidExW.Addr(), uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(className)), uintptr(classNameSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) if r1 == 0 { err = errnoErr(e1) } @@ -3935,7 +3990,7 @@ func setupDiClassNameFromGuidEx(classGUID *GUID, className *uint16, classNameSiz } func setupDiCreateDeviceInfoListEx(classGUID *GUID, hwndParent uintptr, machineName *uint16, reserved uintptr) (handle DevInfo, err error) { - r0, _, e1 := syscall.Syscall6(procSetupDiCreateDeviceInfoListExW.Addr(), 4, uintptr(unsafe.Pointer(classGUID)), uintptr(hwndParent), uintptr(unsafe.Pointer(machineName)), uintptr(reserved), 0, 0) + r0, _, e1 := syscall.SyscallN(procSetupDiCreateDeviceInfoListExW.Addr(), uintptr(unsafe.Pointer(classGUID)), uintptr(hwndParent), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) handle = DevInfo(r0) if handle == DevInfo(InvalidHandle) { err = errnoErr(e1) @@ -3944,7 +3999,7 @@ func setupDiCreateDeviceInfoListEx(classGUID *GUID, hwndParent uintptr, machineN } func setupDiCreateDeviceInfo(deviceInfoSet DevInfo, DeviceName *uint16, classGUID *GUID, DeviceDescription *uint16, hwndParent uintptr, CreationFlags DICD, deviceInfoData *DevInfoData) (err error) { - r1, _, e1 := syscall.Syscall9(procSetupDiCreateDeviceInfoW.Addr(), 7, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(DeviceName)), uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(DeviceDescription)), uintptr(hwndParent), uintptr(CreationFlags), uintptr(unsafe.Pointer(deviceInfoData)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetupDiCreateDeviceInfoW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(DeviceName)), uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(DeviceDescription)), uintptr(hwndParent), uintptr(CreationFlags), uintptr(unsafe.Pointer(deviceInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -3952,7 +4007,7 @@ func setupDiCreateDeviceInfo(deviceInfoSet DevInfo, DeviceName *uint16, classGUI } func SetupDiDestroyDeviceInfoList(deviceInfoSet DevInfo) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiDestroyDeviceInfoList.Addr(), 1, uintptr(deviceInfoSet), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetupDiDestroyDeviceInfoList.Addr(), uintptr(deviceInfoSet)) if r1 == 0 { err = errnoErr(e1) } @@ -3960,7 +4015,7 @@ func SetupDiDestroyDeviceInfoList(deviceInfoSet DevInfo) (err error) { } func SetupDiDestroyDriverInfoList(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverType SPDIT) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiDestroyDriverInfoList.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType)) + r1, _, e1 := syscall.SyscallN(procSetupDiDestroyDriverInfoList.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType)) if r1 == 0 { err = errnoErr(e1) } @@ -3968,7 +4023,7 @@ func SetupDiDestroyDriverInfoList(deviceInfoSet DevInfo, deviceInfoData *DevInfo } func setupDiEnumDeviceInfo(deviceInfoSet DevInfo, memberIndex uint32, deviceInfoData *DevInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiEnumDeviceInfo.Addr(), 3, uintptr(deviceInfoSet), uintptr(memberIndex), uintptr(unsafe.Pointer(deviceInfoData))) + r1, _, e1 := syscall.SyscallN(procSetupDiEnumDeviceInfo.Addr(), uintptr(deviceInfoSet), uintptr(memberIndex), uintptr(unsafe.Pointer(deviceInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -3976,7 +4031,7 @@ func setupDiEnumDeviceInfo(deviceInfoSet DevInfo, memberIndex uint32, deviceInfo } func setupDiEnumDriverInfo(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverType SPDIT, memberIndex uint32, driverInfoData *DrvInfoData) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiEnumDriverInfoW.Addr(), 5, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType), uintptr(memberIndex), uintptr(unsafe.Pointer(driverInfoData)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiEnumDriverInfoW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(driverType), uintptr(memberIndex), uintptr(unsafe.Pointer(driverInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -3984,7 +4039,7 @@ func setupDiEnumDriverInfo(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, d } func setupDiGetClassDevsEx(classGUID *GUID, Enumerator *uint16, hwndParent uintptr, Flags DIGCF, deviceInfoSet DevInfo, machineName *uint16, reserved uintptr) (handle DevInfo, err error) { - r0, _, e1 := syscall.Syscall9(procSetupDiGetClassDevsExW.Addr(), 7, uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(Enumerator)), uintptr(hwndParent), uintptr(Flags), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(machineName)), uintptr(reserved), 0, 0) + r0, _, e1 := syscall.SyscallN(procSetupDiGetClassDevsExW.Addr(), uintptr(unsafe.Pointer(classGUID)), uintptr(unsafe.Pointer(Enumerator)), uintptr(hwndParent), uintptr(Flags), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(machineName)), uintptr(reserved)) handle = DevInfo(r0) if handle == DevInfo(InvalidHandle) { err = errnoErr(e1) @@ -3993,7 +4048,7 @@ func setupDiGetClassDevsEx(classGUID *GUID, Enumerator *uint16, hwndParent uintp } func SetupDiGetClassInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, classInstallParams *ClassInstallHeader, classInstallParamsSize uint32, requiredSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiGetClassInstallParamsW.Addr(), 5, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(classInstallParams)), uintptr(classInstallParamsSize), uintptr(unsafe.Pointer(requiredSize)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetClassInstallParamsW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(classInstallParams)), uintptr(classInstallParamsSize), uintptr(unsafe.Pointer(requiredSize))) if r1 == 0 { err = errnoErr(e1) } @@ -4001,7 +4056,7 @@ func SetupDiGetClassInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfo } func setupDiGetDeviceInfoListDetail(deviceInfoSet DevInfo, deviceInfoSetDetailData *DevInfoListDetailData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiGetDeviceInfoListDetailW.Addr(), 2, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoSetDetailData)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDeviceInfoListDetailW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoSetDetailData))) if r1 == 0 { err = errnoErr(e1) } @@ -4009,7 +4064,7 @@ func setupDiGetDeviceInfoListDetail(deviceInfoSet DevInfo, deviceInfoSetDetailDa } func setupDiGetDeviceInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, deviceInstallParams *DevInstallParams) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiGetDeviceInstallParamsW.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(deviceInstallParams))) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDeviceInstallParamsW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(deviceInstallParams))) if r1 == 0 { err = errnoErr(e1) } @@ -4017,7 +4072,7 @@ func setupDiGetDeviceInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInf } func setupDiGetDeviceInstanceId(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, instanceId *uint16, instanceIdSize uint32, instanceIdRequiredSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiGetDeviceInstanceIdW.Addr(), 5, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(instanceId)), uintptr(instanceIdSize), uintptr(unsafe.Pointer(instanceIdRequiredSize)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDeviceInstanceIdW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(instanceId)), uintptr(instanceIdSize), uintptr(unsafe.Pointer(instanceIdRequiredSize))) if r1 == 0 { err = errnoErr(e1) } @@ -4025,7 +4080,7 @@ func setupDiGetDeviceInstanceId(deviceInfoSet DevInfo, deviceInfoData *DevInfoDa } func setupDiGetDeviceProperty(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, propertyKey *DEVPROPKEY, propertyType *DEVPROPTYPE, propertyBuffer *byte, propertyBufferSize uint32, requiredSize *uint32, flags uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procSetupDiGetDevicePropertyW.Addr(), 8, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(propertyKey)), uintptr(unsafe.Pointer(propertyType)), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(flags), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDevicePropertyW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(propertyKey)), uintptr(unsafe.Pointer(propertyType)), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize), uintptr(unsafe.Pointer(requiredSize)), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } @@ -4033,7 +4088,7 @@ func setupDiGetDeviceProperty(deviceInfoSet DevInfo, deviceInfoData *DevInfoData } func setupDiGetDeviceRegistryProperty(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, property SPDRP, propertyRegDataType *uint32, propertyBuffer *byte, propertyBufferSize uint32, requiredSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procSetupDiGetDeviceRegistryPropertyW.Addr(), 7, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(property), uintptr(unsafe.Pointer(propertyRegDataType)), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize), uintptr(unsafe.Pointer(requiredSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDeviceRegistryPropertyW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(property), uintptr(unsafe.Pointer(propertyRegDataType)), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize), uintptr(unsafe.Pointer(requiredSize))) if r1 == 0 { err = errnoErr(e1) } @@ -4041,7 +4096,7 @@ func setupDiGetDeviceRegistryProperty(deviceInfoSet DevInfo, deviceInfoData *Dev } func setupDiGetDriverInfoDetail(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverInfoData *DrvInfoData, driverInfoDetailData *DrvInfoDetailData, driverInfoDetailDataSize uint32, requiredSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiGetDriverInfoDetailW.Addr(), 6, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData)), uintptr(unsafe.Pointer(driverInfoDetailData)), uintptr(driverInfoDetailDataSize), uintptr(unsafe.Pointer(requiredSize))) + r1, _, e1 := syscall.SyscallN(procSetupDiGetDriverInfoDetailW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData)), uintptr(unsafe.Pointer(driverInfoDetailData)), uintptr(driverInfoDetailDataSize), uintptr(unsafe.Pointer(requiredSize))) if r1 == 0 { err = errnoErr(e1) } @@ -4049,7 +4104,7 @@ func setupDiGetDriverInfoDetail(deviceInfoSet DevInfo, deviceInfoData *DevInfoDa } func setupDiGetSelectedDevice(deviceInfoSet DevInfo, deviceInfoData *DevInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiGetSelectedDevice.Addr(), 2, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiGetSelectedDevice.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -4057,7 +4112,7 @@ func setupDiGetSelectedDevice(deviceInfoSet DevInfo, deviceInfoData *DevInfoData } func setupDiGetSelectedDriver(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverInfoData *DrvInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiGetSelectedDriverW.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData))) + r1, _, e1 := syscall.SyscallN(procSetupDiGetSelectedDriverW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -4065,7 +4120,7 @@ func setupDiGetSelectedDriver(deviceInfoSet DevInfo, deviceInfoData *DevInfoData } func SetupDiOpenDevRegKey(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, Scope DICS_FLAG, HwProfile uint32, KeyType DIREG, samDesired uint32) (key Handle, err error) { - r0, _, e1 := syscall.Syscall6(procSetupDiOpenDevRegKey.Addr(), 6, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(Scope), uintptr(HwProfile), uintptr(KeyType), uintptr(samDesired)) + r0, _, e1 := syscall.SyscallN(procSetupDiOpenDevRegKey.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(Scope), uintptr(HwProfile), uintptr(KeyType), uintptr(samDesired)) key = Handle(r0) if key == InvalidHandle { err = errnoErr(e1) @@ -4074,7 +4129,7 @@ func SetupDiOpenDevRegKey(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, Sc } func SetupDiSetClassInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, classInstallParams *ClassInstallHeader, classInstallParamsSize uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiSetClassInstallParamsW.Addr(), 4, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(classInstallParams)), uintptr(classInstallParamsSize), 0, 0) + r1, _, e1 := syscall.SyscallN(procSetupDiSetClassInstallParamsW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(classInstallParams)), uintptr(classInstallParamsSize)) if r1 == 0 { err = errnoErr(e1) } @@ -4082,7 +4137,7 @@ func SetupDiSetClassInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfo } func SetupDiSetDeviceInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, deviceInstallParams *DevInstallParams) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiSetDeviceInstallParamsW.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(deviceInstallParams))) + r1, _, e1 := syscall.SyscallN(procSetupDiSetDeviceInstallParamsW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(deviceInstallParams))) if r1 == 0 { err = errnoErr(e1) } @@ -4090,7 +4145,7 @@ func SetupDiSetDeviceInstallParams(deviceInfoSet DevInfo, deviceInfoData *DevInf } func setupDiSetDeviceRegistryProperty(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, property SPDRP, propertyBuffer *byte, propertyBufferSize uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procSetupDiSetDeviceRegistryPropertyW.Addr(), 5, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(property), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiSetDeviceRegistryPropertyW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(property), uintptr(unsafe.Pointer(propertyBuffer)), uintptr(propertyBufferSize)) if r1 == 0 { err = errnoErr(e1) } @@ -4098,7 +4153,7 @@ func setupDiSetDeviceRegistryProperty(deviceInfoSet DevInfo, deviceInfoData *Dev } func SetupDiSetSelectedDevice(deviceInfoSet DevInfo, deviceInfoData *DevInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiSetSelectedDevice.Addr(), 2, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), 0) + r1, _, e1 := syscall.SyscallN(procSetupDiSetSelectedDevice.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -4106,7 +4161,7 @@ func SetupDiSetSelectedDevice(deviceInfoSet DevInfo, deviceInfoData *DevInfoData } func SetupDiSetSelectedDriver(deviceInfoSet DevInfo, deviceInfoData *DevInfoData, driverInfoData *DrvInfoData) (err error) { - r1, _, e1 := syscall.Syscall(procSetupDiSetSelectedDriverW.Addr(), 3, uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData))) + r1, _, e1 := syscall.SyscallN(procSetupDiSetSelectedDriverW.Addr(), uintptr(deviceInfoSet), uintptr(unsafe.Pointer(deviceInfoData)), uintptr(unsafe.Pointer(driverInfoData))) if r1 == 0 { err = errnoErr(e1) } @@ -4114,7 +4169,7 @@ func SetupDiSetSelectedDriver(deviceInfoSet DevInfo, deviceInfoData *DevInfoData } func setupUninstallOEMInf(infFileName *uint16, flags SUOI, reserved uintptr) (err error) { - r1, _, e1 := syscall.Syscall(procSetupUninstallOEMInfW.Addr(), 3, uintptr(unsafe.Pointer(infFileName)), uintptr(flags), uintptr(reserved)) + r1, _, e1 := syscall.SyscallN(procSetupUninstallOEMInfW.Addr(), uintptr(unsafe.Pointer(infFileName)), uintptr(flags), uintptr(reserved)) if r1 == 0 { err = errnoErr(e1) } @@ -4122,7 +4177,7 @@ func setupUninstallOEMInf(infFileName *uint16, flags SUOI, reserved uintptr) (er } func commandLineToArgv(cmd *uint16, argc *int32) (argv **uint16, err error) { - r0, _, e1 := syscall.Syscall(procCommandLineToArgvW.Addr(), 2, uintptr(unsafe.Pointer(cmd)), uintptr(unsafe.Pointer(argc)), 0) + r0, _, e1 := syscall.SyscallN(procCommandLineToArgvW.Addr(), uintptr(unsafe.Pointer(cmd)), uintptr(unsafe.Pointer(argc))) argv = (**uint16)(unsafe.Pointer(r0)) if argv == nil { err = errnoErr(e1) @@ -4131,7 +4186,7 @@ func commandLineToArgv(cmd *uint16, argc *int32) (argv **uint16, err error) { } func shGetKnownFolderPath(id *KNOWNFOLDERID, flags uint32, token Token, path **uint16) (ret error) { - r0, _, _ := syscall.Syscall6(procSHGetKnownFolderPath.Addr(), 4, uintptr(unsafe.Pointer(id)), uintptr(flags), uintptr(token), uintptr(unsafe.Pointer(path)), 0, 0) + r0, _, _ := syscall.SyscallN(procSHGetKnownFolderPath.Addr(), uintptr(unsafe.Pointer(id)), uintptr(flags), uintptr(token), uintptr(unsafe.Pointer(path))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -4139,7 +4194,7 @@ func shGetKnownFolderPath(id *KNOWNFOLDERID, flags uint32, token Token, path **u } func ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *uint16, showCmd int32) (err error) { - r1, _, e1 := syscall.Syscall6(procShellExecuteW.Addr(), 6, uintptr(hwnd), uintptr(unsafe.Pointer(verb)), uintptr(unsafe.Pointer(file)), uintptr(unsafe.Pointer(args)), uintptr(unsafe.Pointer(cwd)), uintptr(showCmd)) + r1, _, e1 := syscall.SyscallN(procShellExecuteW.Addr(), uintptr(hwnd), uintptr(unsafe.Pointer(verb)), uintptr(unsafe.Pointer(file)), uintptr(unsafe.Pointer(args)), uintptr(unsafe.Pointer(cwd)), uintptr(showCmd)) if r1 <= 32 { err = errnoErr(e1) } @@ -4147,12 +4202,12 @@ func ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *ui } func EnumChildWindows(hwnd HWND, enumFunc uintptr, param unsafe.Pointer) { - syscall.Syscall(procEnumChildWindows.Addr(), 3, uintptr(hwnd), uintptr(enumFunc), uintptr(param)) + syscall.SyscallN(procEnumChildWindows.Addr(), uintptr(hwnd), uintptr(enumFunc), uintptr(param)) return } func EnumWindows(enumFunc uintptr, param unsafe.Pointer) (err error) { - r1, _, e1 := syscall.Syscall(procEnumWindows.Addr(), 2, uintptr(enumFunc), uintptr(param), 0) + r1, _, e1 := syscall.SyscallN(procEnumWindows.Addr(), uintptr(enumFunc), uintptr(param)) if r1 == 0 { err = errnoErr(e1) } @@ -4160,7 +4215,7 @@ func EnumWindows(enumFunc uintptr, param unsafe.Pointer) (err error) { } func ExitWindowsEx(flags uint32, reason uint32) (err error) { - r1, _, e1 := syscall.Syscall(procExitWindowsEx.Addr(), 2, uintptr(flags), uintptr(reason), 0) + r1, _, e1 := syscall.SyscallN(procExitWindowsEx.Addr(), uintptr(flags), uintptr(reason)) if r1 == 0 { err = errnoErr(e1) } @@ -4168,7 +4223,7 @@ func ExitWindowsEx(flags uint32, reason uint32) (err error) { } func GetClassName(hwnd HWND, className *uint16, maxCount int32) (copied int32, err error) { - r0, _, e1 := syscall.Syscall(procGetClassNameW.Addr(), 3, uintptr(hwnd), uintptr(unsafe.Pointer(className)), uintptr(maxCount)) + r0, _, e1 := syscall.SyscallN(procGetClassNameW.Addr(), uintptr(hwnd), uintptr(unsafe.Pointer(className)), uintptr(maxCount)) copied = int32(r0) if copied == 0 { err = errnoErr(e1) @@ -4177,19 +4232,19 @@ func GetClassName(hwnd HWND, className *uint16, maxCount int32) (copied int32, e } func GetDesktopWindow() (hwnd HWND) { - r0, _, _ := syscall.Syscall(procGetDesktopWindow.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetDesktopWindow.Addr()) hwnd = HWND(r0) return } func GetForegroundWindow() (hwnd HWND) { - r0, _, _ := syscall.Syscall(procGetForegroundWindow.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetForegroundWindow.Addr()) hwnd = HWND(r0) return } func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) { - r1, _, e1 := syscall.Syscall(procGetGUIThreadInfo.Addr(), 2, uintptr(thread), uintptr(unsafe.Pointer(info)), 0) + r1, _, e1 := syscall.SyscallN(procGetGUIThreadInfo.Addr(), uintptr(thread), uintptr(unsafe.Pointer(info))) if r1 == 0 { err = errnoErr(e1) } @@ -4197,19 +4252,19 @@ func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) { } func GetKeyboardLayout(tid uint32) (hkl Handle) { - r0, _, _ := syscall.Syscall(procGetKeyboardLayout.Addr(), 1, uintptr(tid), 0, 0) + r0, _, _ := syscall.SyscallN(procGetKeyboardLayout.Addr(), uintptr(tid)) hkl = Handle(r0) return } func GetShellWindow() (shellWindow HWND) { - r0, _, _ := syscall.Syscall(procGetShellWindow.Addr(), 0, 0, 0, 0) + r0, _, _ := syscall.SyscallN(procGetShellWindow.Addr()) shellWindow = HWND(r0) return } func GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetWindowThreadProcessId.Addr(), 2, uintptr(hwnd), uintptr(unsafe.Pointer(pid)), 0) + r0, _, e1 := syscall.SyscallN(procGetWindowThreadProcessId.Addr(), uintptr(hwnd), uintptr(unsafe.Pointer(pid))) tid = uint32(r0) if tid == 0 { err = errnoErr(e1) @@ -4218,25 +4273,25 @@ func GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) { } func IsWindow(hwnd HWND) (isWindow bool) { - r0, _, _ := syscall.Syscall(procIsWindow.Addr(), 1, uintptr(hwnd), 0, 0) + r0, _, _ := syscall.SyscallN(procIsWindow.Addr(), uintptr(hwnd)) isWindow = r0 != 0 return } func IsWindowUnicode(hwnd HWND) (isUnicode bool) { - r0, _, _ := syscall.Syscall(procIsWindowUnicode.Addr(), 1, uintptr(hwnd), 0, 0) + r0, _, _ := syscall.SyscallN(procIsWindowUnicode.Addr(), uintptr(hwnd)) isUnicode = r0 != 0 return } func IsWindowVisible(hwnd HWND) (isVisible bool) { - r0, _, _ := syscall.Syscall(procIsWindowVisible.Addr(), 1, uintptr(hwnd), 0, 0) + r0, _, _ := syscall.SyscallN(procIsWindowVisible.Addr(), uintptr(hwnd)) isVisible = r0 != 0 return } func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) { - r0, _, e1 := syscall.Syscall(procLoadKeyboardLayoutW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(flags), 0) + r0, _, e1 := syscall.SyscallN(procLoadKeyboardLayoutW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(flags)) hkl = Handle(r0) if hkl == 0 { err = errnoErr(e1) @@ -4245,7 +4300,7 @@ func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) { } func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) { - r0, _, e1 := syscall.Syscall6(procMessageBoxW.Addr(), 4, uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype), 0, 0) + r0, _, e1 := syscall.SyscallN(procMessageBoxW.Addr(), uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype)) ret = int32(r0) if ret == 0 { err = errnoErr(e1) @@ -4254,13 +4309,13 @@ func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret i } func ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) { - r0, _, _ := syscall.Syscall9(procToUnicodeEx.Addr(), 7, uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl), 0, 0) + r0, _, _ := syscall.SyscallN(procToUnicodeEx.Addr(), uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl)) ret = int32(r0) return } func UnloadKeyboardLayout(hkl Handle) (err error) { - r1, _, e1 := syscall.Syscall(procUnloadKeyboardLayout.Addr(), 1, uintptr(hkl), 0, 0) + r1, _, e1 := syscall.SyscallN(procUnloadKeyboardLayout.Addr(), uintptr(hkl)) if r1 == 0 { err = errnoErr(e1) } @@ -4272,7 +4327,7 @@ func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) ( if inheritExisting { _p0 = 1 } - r1, _, e1 := syscall.Syscall(procCreateEnvironmentBlock.Addr(), 3, uintptr(unsafe.Pointer(block)), uintptr(token), uintptr(_p0)) + r1, _, e1 := syscall.SyscallN(procCreateEnvironmentBlock.Addr(), uintptr(unsafe.Pointer(block)), uintptr(token), uintptr(_p0)) if r1 == 0 { err = errnoErr(e1) } @@ -4280,7 +4335,7 @@ func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) ( } func DestroyEnvironmentBlock(block *uint16) (err error) { - r1, _, e1 := syscall.Syscall(procDestroyEnvironmentBlock.Addr(), 1, uintptr(unsafe.Pointer(block)), 0, 0) + r1, _, e1 := syscall.SyscallN(procDestroyEnvironmentBlock.Addr(), uintptr(unsafe.Pointer(block))) if r1 == 0 { err = errnoErr(e1) } @@ -4288,7 +4343,7 @@ func DestroyEnvironmentBlock(block *uint16) (err error) { } func GetUserProfileDirectory(t Token, dir *uint16, dirLen *uint32) (err error) { - r1, _, e1 := syscall.Syscall(procGetUserProfileDirectoryW.Addr(), 3, uintptr(t), uintptr(unsafe.Pointer(dir)), uintptr(unsafe.Pointer(dirLen))) + r1, _, e1 := syscall.SyscallN(procGetUserProfileDirectoryW.Addr(), uintptr(t), uintptr(unsafe.Pointer(dir)), uintptr(unsafe.Pointer(dirLen))) if r1 == 0 { err = errnoErr(e1) } @@ -4305,7 +4360,7 @@ func GetFileVersionInfoSize(filename string, zeroHandle *Handle) (bufSize uint32 } func _GetFileVersionInfoSize(filename *uint16, zeroHandle *Handle) (bufSize uint32, err error) { - r0, _, e1 := syscall.Syscall(procGetFileVersionInfoSizeW.Addr(), 2, uintptr(unsafe.Pointer(filename)), uintptr(unsafe.Pointer(zeroHandle)), 0) + r0, _, e1 := syscall.SyscallN(procGetFileVersionInfoSizeW.Addr(), uintptr(unsafe.Pointer(filename)), uintptr(unsafe.Pointer(zeroHandle))) bufSize = uint32(r0) if bufSize == 0 { err = errnoErr(e1) @@ -4323,7 +4378,7 @@ func GetFileVersionInfo(filename string, handle uint32, bufSize uint32, buffer u } func _GetFileVersionInfo(filename *uint16, handle uint32, bufSize uint32, buffer unsafe.Pointer) (err error) { - r1, _, e1 := syscall.Syscall6(procGetFileVersionInfoW.Addr(), 4, uintptr(unsafe.Pointer(filename)), uintptr(handle), uintptr(bufSize), uintptr(buffer), 0, 0) + r1, _, e1 := syscall.SyscallN(procGetFileVersionInfoW.Addr(), uintptr(unsafe.Pointer(filename)), uintptr(handle), uintptr(bufSize), uintptr(buffer)) if r1 == 0 { err = errnoErr(e1) } @@ -4340,7 +4395,7 @@ func VerQueryValue(block unsafe.Pointer, subBlock string, pointerToBufferPointer } func _VerQueryValue(block unsafe.Pointer, subBlock *uint16, pointerToBufferPointer unsafe.Pointer, bufSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procVerQueryValueW.Addr(), 4, uintptr(block), uintptr(unsafe.Pointer(subBlock)), uintptr(pointerToBufferPointer), uintptr(unsafe.Pointer(bufSize)), 0, 0) + r1, _, e1 := syscall.SyscallN(procVerQueryValueW.Addr(), uintptr(block), uintptr(unsafe.Pointer(subBlock)), uintptr(pointerToBufferPointer), uintptr(unsafe.Pointer(bufSize))) if r1 == 0 { err = errnoErr(e1) } @@ -4348,7 +4403,7 @@ func _VerQueryValue(block unsafe.Pointer, subBlock *uint16, pointerToBufferPoint } func TimeBeginPeriod(period uint32) (err error) { - r1, _, e1 := syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0) + r1, _, e1 := syscall.SyscallN(proctimeBeginPeriod.Addr(), uintptr(period)) if r1 != 0 { err = errnoErr(e1) } @@ -4356,7 +4411,7 @@ func TimeBeginPeriod(period uint32) (err error) { } func TimeEndPeriod(period uint32) (err error) { - r1, _, e1 := syscall.Syscall(proctimeEndPeriod.Addr(), 1, uintptr(period), 0, 0) + r1, _, e1 := syscall.SyscallN(proctimeEndPeriod.Addr(), uintptr(period)) if r1 != 0 { err = errnoErr(e1) } @@ -4364,7 +4419,7 @@ func TimeEndPeriod(period uint32) (err error) { } func WinVerifyTrustEx(hwnd HWND, actionId *GUID, data *WinTrustData) (ret error) { - r0, _, _ := syscall.Syscall(procWinVerifyTrustEx.Addr(), 3, uintptr(hwnd), uintptr(unsafe.Pointer(actionId)), uintptr(unsafe.Pointer(data))) + r0, _, _ := syscall.SyscallN(procWinVerifyTrustEx.Addr(), uintptr(hwnd), uintptr(unsafe.Pointer(actionId)), uintptr(unsafe.Pointer(data))) if r0 != 0 { ret = syscall.Errno(r0) } @@ -4372,12 +4427,12 @@ func WinVerifyTrustEx(hwnd HWND, actionId *GUID, data *WinTrustData) (ret error) } func FreeAddrInfoW(addrinfo *AddrinfoW) { - syscall.Syscall(procFreeAddrInfoW.Addr(), 1, uintptr(unsafe.Pointer(addrinfo)), 0, 0) + syscall.SyscallN(procFreeAddrInfoW.Addr(), uintptr(unsafe.Pointer(addrinfo))) return } func GetAddrInfoW(nodename *uint16, servicename *uint16, hints *AddrinfoW, result **AddrinfoW) (sockerr error) { - r0, _, _ := syscall.Syscall6(procGetAddrInfoW.Addr(), 4, uintptr(unsafe.Pointer(nodename)), uintptr(unsafe.Pointer(servicename)), uintptr(unsafe.Pointer(hints)), uintptr(unsafe.Pointer(result)), 0, 0) + r0, _, _ := syscall.SyscallN(procGetAddrInfoW.Addr(), uintptr(unsafe.Pointer(nodename)), uintptr(unsafe.Pointer(servicename)), uintptr(unsafe.Pointer(hints)), uintptr(unsafe.Pointer(result))) if r0 != 0 { sockerr = syscall.Errno(r0) } @@ -4385,7 +4440,7 @@ func GetAddrInfoW(nodename *uint16, servicename *uint16, hints *AddrinfoW, resul } func WSACleanup() (err error) { - r1, _, e1 := syscall.Syscall(procWSACleanup.Addr(), 0, 0, 0, 0) + r1, _, e1 := syscall.SyscallN(procWSACleanup.Addr()) if r1 == socket_error { err = errnoErr(e1) } @@ -4393,7 +4448,7 @@ func WSACleanup() (err error) { } func WSADuplicateSocket(s Handle, processID uint32, info *WSAProtocolInfo) (err error) { - r1, _, e1 := syscall.Syscall(procWSADuplicateSocketW.Addr(), 3, uintptr(s), uintptr(processID), uintptr(unsafe.Pointer(info))) + r1, _, e1 := syscall.SyscallN(procWSADuplicateSocketW.Addr(), uintptr(s), uintptr(processID), uintptr(unsafe.Pointer(info))) if r1 != 0 { err = errnoErr(e1) } @@ -4401,7 +4456,7 @@ func WSADuplicateSocket(s Handle, processID uint32, info *WSAProtocolInfo) (err } func WSAEnumProtocols(protocols *int32, protocolBuffer *WSAProtocolInfo, bufferLength *uint32) (n int32, err error) { - r0, _, e1 := syscall.Syscall(procWSAEnumProtocolsW.Addr(), 3, uintptr(unsafe.Pointer(protocols)), uintptr(unsafe.Pointer(protocolBuffer)), uintptr(unsafe.Pointer(bufferLength))) + r0, _, e1 := syscall.SyscallN(procWSAEnumProtocolsW.Addr(), uintptr(unsafe.Pointer(protocols)), uintptr(unsafe.Pointer(protocolBuffer)), uintptr(unsafe.Pointer(bufferLength))) n = int32(r0) if n == -1 { err = errnoErr(e1) @@ -4414,7 +4469,7 @@ func WSAGetOverlappedResult(h Handle, o *Overlapped, bytes *uint32, wait bool, f if wait { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procWSAGetOverlappedResult.Addr(), 5, uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags)), 0) + r1, _, e1 := syscall.SyscallN(procWSAGetOverlappedResult.Addr(), uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags))) if r1 == 0 { err = errnoErr(e1) } @@ -4422,7 +4477,7 @@ func WSAGetOverlappedResult(h Handle, o *Overlapped, bytes *uint32, wait bool, f } func WSAIoctl(s Handle, iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbob uint32, cbbr *uint32, overlapped *Overlapped, completionRoutine uintptr) (err error) { - r1, _, e1 := syscall.Syscall9(procWSAIoctl.Addr(), 9, uintptr(s), uintptr(iocc), uintptr(unsafe.Pointer(inbuf)), uintptr(cbif), uintptr(unsafe.Pointer(outbuf)), uintptr(cbob), uintptr(unsafe.Pointer(cbbr)), uintptr(unsafe.Pointer(overlapped)), uintptr(completionRoutine)) + r1, _, e1 := syscall.SyscallN(procWSAIoctl.Addr(), uintptr(s), uintptr(iocc), uintptr(unsafe.Pointer(inbuf)), uintptr(cbif), uintptr(unsafe.Pointer(outbuf)), uintptr(cbob), uintptr(unsafe.Pointer(cbbr)), uintptr(unsafe.Pointer(overlapped)), uintptr(completionRoutine)) if r1 == socket_error { err = errnoErr(e1) } @@ -4430,7 +4485,7 @@ func WSAIoctl(s Handle, iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbo } func WSALookupServiceBegin(querySet *WSAQUERYSET, flags uint32, handle *Handle) (err error) { - r1, _, e1 := syscall.Syscall(procWSALookupServiceBeginW.Addr(), 3, uintptr(unsafe.Pointer(querySet)), uintptr(flags), uintptr(unsafe.Pointer(handle))) + r1, _, e1 := syscall.SyscallN(procWSALookupServiceBeginW.Addr(), uintptr(unsafe.Pointer(querySet)), uintptr(flags), uintptr(unsafe.Pointer(handle))) if r1 == socket_error { err = errnoErr(e1) } @@ -4438,7 +4493,7 @@ func WSALookupServiceBegin(querySet *WSAQUERYSET, flags uint32, handle *Handle) } func WSALookupServiceEnd(handle Handle) (err error) { - r1, _, e1 := syscall.Syscall(procWSALookupServiceEnd.Addr(), 1, uintptr(handle), 0, 0) + r1, _, e1 := syscall.SyscallN(procWSALookupServiceEnd.Addr(), uintptr(handle)) if r1 == socket_error { err = errnoErr(e1) } @@ -4446,7 +4501,7 @@ func WSALookupServiceEnd(handle Handle) (err error) { } func WSALookupServiceNext(handle Handle, flags uint32, size *int32, querySet *WSAQUERYSET) (err error) { - r1, _, e1 := syscall.Syscall6(procWSALookupServiceNextW.Addr(), 4, uintptr(handle), uintptr(flags), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(querySet)), 0, 0) + r1, _, e1 := syscall.SyscallN(procWSALookupServiceNextW.Addr(), uintptr(handle), uintptr(flags), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(querySet))) if r1 == socket_error { err = errnoErr(e1) } @@ -4454,7 +4509,7 @@ func WSALookupServiceNext(handle Handle, flags uint32, size *int32, querySet *WS } func WSARecv(s Handle, bufs *WSABuf, bufcnt uint32, recvd *uint32, flags *uint32, overlapped *Overlapped, croutine *byte) (err error) { - r1, _, e1 := syscall.Syscall9(procWSARecv.Addr(), 7, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine)), 0, 0) + r1, _, e1 := syscall.SyscallN(procWSARecv.Addr(), uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) if r1 == socket_error { err = errnoErr(e1) } @@ -4462,7 +4517,7 @@ func WSARecv(s Handle, bufs *WSABuf, bufcnt uint32, recvd *uint32, flags *uint32 } func WSARecvFrom(s Handle, bufs *WSABuf, bufcnt uint32, recvd *uint32, flags *uint32, from *RawSockaddrAny, fromlen *int32, overlapped *Overlapped, croutine *byte) (err error) { - r1, _, e1 := syscall.Syscall9(procWSARecvFrom.Addr(), 9, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(fromlen)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) + r1, _, e1 := syscall.SyscallN(procWSARecvFrom.Addr(), uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(recvd)), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(fromlen)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) if r1 == socket_error { err = errnoErr(e1) } @@ -4470,7 +4525,7 @@ func WSARecvFrom(s Handle, bufs *WSABuf, bufcnt uint32, recvd *uint32, flags *ui } func WSASend(s Handle, bufs *WSABuf, bufcnt uint32, sent *uint32, flags uint32, overlapped *Overlapped, croutine *byte) (err error) { - r1, _, e1 := syscall.Syscall9(procWSASend.Addr(), 7, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(sent)), uintptr(flags), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine)), 0, 0) + r1, _, e1 := syscall.SyscallN(procWSASend.Addr(), uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(sent)), uintptr(flags), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) if r1 == socket_error { err = errnoErr(e1) } @@ -4478,7 +4533,7 @@ func WSASend(s Handle, bufs *WSABuf, bufcnt uint32, sent *uint32, flags uint32, } func WSASendTo(s Handle, bufs *WSABuf, bufcnt uint32, sent *uint32, flags uint32, to *RawSockaddrAny, tolen int32, overlapped *Overlapped, croutine *byte) (err error) { - r1, _, e1 := syscall.Syscall9(procWSASendTo.Addr(), 9, uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(sent)), uintptr(flags), uintptr(unsafe.Pointer(to)), uintptr(tolen), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) + r1, _, e1 := syscall.SyscallN(procWSASendTo.Addr(), uintptr(s), uintptr(unsafe.Pointer(bufs)), uintptr(bufcnt), uintptr(unsafe.Pointer(sent)), uintptr(flags), uintptr(unsafe.Pointer(to)), uintptr(tolen), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine))) if r1 == socket_error { err = errnoErr(e1) } @@ -4486,7 +4541,7 @@ func WSASendTo(s Handle, bufs *WSABuf, bufcnt uint32, sent *uint32, flags uint32 } func WSASocket(af int32, typ int32, protocol int32, protoInfo *WSAProtocolInfo, group uint32, flags uint32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall6(procWSASocketW.Addr(), 6, uintptr(af), uintptr(typ), uintptr(protocol), uintptr(unsafe.Pointer(protoInfo)), uintptr(group), uintptr(flags)) + r0, _, e1 := syscall.SyscallN(procWSASocketW.Addr(), uintptr(af), uintptr(typ), uintptr(protocol), uintptr(unsafe.Pointer(protoInfo)), uintptr(group), uintptr(flags)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -4495,7 +4550,7 @@ func WSASocket(af int32, typ int32, protocol int32, protoInfo *WSAProtocolInfo, } func WSAStartup(verreq uint32, data *WSAData) (sockerr error) { - r0, _, _ := syscall.Syscall(procWSAStartup.Addr(), 2, uintptr(verreq), uintptr(unsafe.Pointer(data)), 0) + r0, _, _ := syscall.SyscallN(procWSAStartup.Addr(), uintptr(verreq), uintptr(unsafe.Pointer(data))) if r0 != 0 { sockerr = syscall.Errno(r0) } @@ -4503,7 +4558,7 @@ func WSAStartup(verreq uint32, data *WSAData) (sockerr error) { } func bind(s Handle, name unsafe.Pointer, namelen int32) (err error) { - r1, _, e1 := syscall.Syscall(procbind.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen)) + r1, _, e1 := syscall.SyscallN(procbind.Addr(), uintptr(s), uintptr(name), uintptr(namelen)) if r1 == socket_error { err = errnoErr(e1) } @@ -4511,7 +4566,7 @@ func bind(s Handle, name unsafe.Pointer, namelen int32) (err error) { } func Closesocket(s Handle) (err error) { - r1, _, e1 := syscall.Syscall(procclosesocket.Addr(), 1, uintptr(s), 0, 0) + r1, _, e1 := syscall.SyscallN(procclosesocket.Addr(), uintptr(s)) if r1 == socket_error { err = errnoErr(e1) } @@ -4519,7 +4574,7 @@ func Closesocket(s Handle) (err error) { } func connect(s Handle, name unsafe.Pointer, namelen int32) (err error) { - r1, _, e1 := syscall.Syscall(procconnect.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen)) + r1, _, e1 := syscall.SyscallN(procconnect.Addr(), uintptr(s), uintptr(name), uintptr(namelen)) if r1 == socket_error { err = errnoErr(e1) } @@ -4536,7 +4591,7 @@ func GetHostByName(name string) (h *Hostent, err error) { } func _GetHostByName(name *byte) (h *Hostent, err error) { - r0, _, e1 := syscall.Syscall(procgethostbyname.Addr(), 1, uintptr(unsafe.Pointer(name)), 0, 0) + r0, _, e1 := syscall.SyscallN(procgethostbyname.Addr(), uintptr(unsafe.Pointer(name))) h = (*Hostent)(unsafe.Pointer(r0)) if h == nil { err = errnoErr(e1) @@ -4545,7 +4600,7 @@ func _GetHostByName(name *byte) (h *Hostent, err error) { } func getpeername(s Handle, rsa *RawSockaddrAny, addrlen *int32) (err error) { - r1, _, e1 := syscall.Syscall(procgetpeername.Addr(), 3, uintptr(s), uintptr(unsafe.Pointer(rsa)), uintptr(unsafe.Pointer(addrlen))) + r1, _, e1 := syscall.SyscallN(procgetpeername.Addr(), uintptr(s), uintptr(unsafe.Pointer(rsa)), uintptr(unsafe.Pointer(addrlen))) if r1 == socket_error { err = errnoErr(e1) } @@ -4562,7 +4617,7 @@ func GetProtoByName(name string) (p *Protoent, err error) { } func _GetProtoByName(name *byte) (p *Protoent, err error) { - r0, _, e1 := syscall.Syscall(procgetprotobyname.Addr(), 1, uintptr(unsafe.Pointer(name)), 0, 0) + r0, _, e1 := syscall.SyscallN(procgetprotobyname.Addr(), uintptr(unsafe.Pointer(name))) p = (*Protoent)(unsafe.Pointer(r0)) if p == nil { err = errnoErr(e1) @@ -4585,7 +4640,7 @@ func GetServByName(name string, proto string) (s *Servent, err error) { } func _GetServByName(name *byte, proto *byte) (s *Servent, err error) { - r0, _, e1 := syscall.Syscall(procgetservbyname.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(proto)), 0) + r0, _, e1 := syscall.SyscallN(procgetservbyname.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(proto))) s = (*Servent)(unsafe.Pointer(r0)) if s == nil { err = errnoErr(e1) @@ -4594,7 +4649,7 @@ func _GetServByName(name *byte, proto *byte) (s *Servent, err error) { } func getsockname(s Handle, rsa *RawSockaddrAny, addrlen *int32) (err error) { - r1, _, e1 := syscall.Syscall(procgetsockname.Addr(), 3, uintptr(s), uintptr(unsafe.Pointer(rsa)), uintptr(unsafe.Pointer(addrlen))) + r1, _, e1 := syscall.SyscallN(procgetsockname.Addr(), uintptr(s), uintptr(unsafe.Pointer(rsa)), uintptr(unsafe.Pointer(addrlen))) if r1 == socket_error { err = errnoErr(e1) } @@ -4602,7 +4657,7 @@ func getsockname(s Handle, rsa *RawSockaddrAny, addrlen *int32) (err error) { } func Getsockopt(s Handle, level int32, optname int32, optval *byte, optlen *int32) (err error) { - r1, _, e1 := syscall.Syscall6(procgetsockopt.Addr(), 5, uintptr(s), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(optval)), uintptr(unsafe.Pointer(optlen)), 0) + r1, _, e1 := syscall.SyscallN(procgetsockopt.Addr(), uintptr(s), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(optval)), uintptr(unsafe.Pointer(optlen))) if r1 == socket_error { err = errnoErr(e1) } @@ -4610,7 +4665,7 @@ func Getsockopt(s Handle, level int32, optname int32, optval *byte, optlen *int3 } func listen(s Handle, backlog int32) (err error) { - r1, _, e1 := syscall.Syscall(proclisten.Addr(), 2, uintptr(s), uintptr(backlog), 0) + r1, _, e1 := syscall.SyscallN(proclisten.Addr(), uintptr(s), uintptr(backlog)) if r1 == socket_error { err = errnoErr(e1) } @@ -4618,7 +4673,7 @@ func listen(s Handle, backlog int32) (err error) { } func Ntohs(netshort uint16) (u uint16) { - r0, _, _ := syscall.Syscall(procntohs.Addr(), 1, uintptr(netshort), 0, 0) + r0, _, _ := syscall.SyscallN(procntohs.Addr(), uintptr(netshort)) u = uint16(r0) return } @@ -4628,7 +4683,7 @@ func recvfrom(s Handle, buf []byte, flags int32, from *RawSockaddrAny, fromlen * if len(buf) > 0 { _p0 = &buf[0] } - r0, _, e1 := syscall.Syscall6(procrecvfrom.Addr(), 6, uintptr(s), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(flags), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(fromlen))) + r0, _, e1 := syscall.SyscallN(procrecvfrom.Addr(), uintptr(s), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(flags), uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(fromlen))) n = int32(r0) if n == -1 { err = errnoErr(e1) @@ -4641,7 +4696,7 @@ func sendto(s Handle, buf []byte, flags int32, to unsafe.Pointer, tolen int32) ( if len(buf) > 0 { _p0 = &buf[0] } - r1, _, e1 := syscall.Syscall6(procsendto.Addr(), 6, uintptr(s), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(flags), uintptr(to), uintptr(tolen)) + r1, _, e1 := syscall.SyscallN(procsendto.Addr(), uintptr(s), uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), uintptr(flags), uintptr(to), uintptr(tolen)) if r1 == socket_error { err = errnoErr(e1) } @@ -4649,7 +4704,7 @@ func sendto(s Handle, buf []byte, flags int32, to unsafe.Pointer, tolen int32) ( } func Setsockopt(s Handle, level int32, optname int32, optval *byte, optlen int32) (err error) { - r1, _, e1 := syscall.Syscall6(procsetsockopt.Addr(), 5, uintptr(s), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(optval)), uintptr(optlen), 0) + r1, _, e1 := syscall.SyscallN(procsetsockopt.Addr(), uintptr(s), uintptr(level), uintptr(optname), uintptr(unsafe.Pointer(optval)), uintptr(optlen)) if r1 == socket_error { err = errnoErr(e1) } @@ -4657,7 +4712,7 @@ func Setsockopt(s Handle, level int32, optname int32, optval *byte, optlen int32 } func shutdown(s Handle, how int32) (err error) { - r1, _, e1 := syscall.Syscall(procshutdown.Addr(), 2, uintptr(s), uintptr(how), 0) + r1, _, e1 := syscall.SyscallN(procshutdown.Addr(), uintptr(s), uintptr(how)) if r1 == socket_error { err = errnoErr(e1) } @@ -4665,7 +4720,7 @@ func shutdown(s Handle, how int32) (err error) { } func socket(af int32, typ int32, protocol int32) (handle Handle, err error) { - r0, _, e1 := syscall.Syscall(procsocket.Addr(), 3, uintptr(af), uintptr(typ), uintptr(protocol)) + r0, _, e1 := syscall.SyscallN(procsocket.Addr(), uintptr(af), uintptr(typ), uintptr(protocol)) handle = Handle(r0) if handle == InvalidHandle { err = errnoErr(e1) @@ -4674,7 +4729,7 @@ func socket(af int32, typ int32, protocol int32) (handle Handle, err error) { } func WTSEnumerateSessions(handle Handle, reserved uint32, version uint32, sessions **WTS_SESSION_INFO, count *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procWTSEnumerateSessionsW.Addr(), 5, uintptr(handle), uintptr(reserved), uintptr(version), uintptr(unsafe.Pointer(sessions)), uintptr(unsafe.Pointer(count)), 0) + r1, _, e1 := syscall.SyscallN(procWTSEnumerateSessionsW.Addr(), uintptr(handle), uintptr(reserved), uintptr(version), uintptr(unsafe.Pointer(sessions)), uintptr(unsafe.Pointer(count))) if r1 == 0 { err = errnoErr(e1) } @@ -4682,12 +4737,12 @@ func WTSEnumerateSessions(handle Handle, reserved uint32, version uint32, sessio } func WTSFreeMemory(ptr uintptr) { - syscall.Syscall(procWTSFreeMemory.Addr(), 1, uintptr(ptr), 0, 0) + syscall.SyscallN(procWTSFreeMemory.Addr(), uintptr(ptr)) return } func WTSQueryUserToken(session uint32, token *Token) (err error) { - r1, _, e1 := syscall.Syscall(procWTSQueryUserToken.Addr(), 2, uintptr(session), uintptr(unsafe.Pointer(token)), 0) + r1, _, e1 := syscall.SyscallN(procWTSQueryUserToken.Addr(), uintptr(session), uintptr(unsafe.Pointer(token))) if r1 == 0 { err = errnoErr(e1) } diff --git a/vendor/modules.txt b/vendor/modules.txt index 11fe32ebc4..5adf43b5f2 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,3 +1,9 @@ +# cyphar.com/go-pathrs v0.2.1 +## explicit; go 1.18 +cyphar.com/go-pathrs +cyphar.com/go-pathrs/internal/fdutils +cyphar.com/go-pathrs/internal/libpathrs +cyphar.com/go-pathrs/procfs # github.com/Microsoft/cosesign1go v1.4.0 ## explicit; go 1.20 github.com/Microsoft/cosesign1go/pkg/cosesign1 @@ -42,12 +48,16 @@ github.com/cenkalti/backoff/v4 # github.com/cespare/xxhash/v2 v2.3.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 +# github.com/checkpoint-restore/go-criu/v6 v6.3.0 +## explicit; go 1.16 +github.com/checkpoint-restore/go-criu/v6 +github.com/checkpoint-restore/go-criu/v6/rpc # github.com/containerd/cgroups/v3 v3.0.5 ## explicit; go 1.22.0 github.com/containerd/cgroups/v3 github.com/containerd/cgroups/v3/cgroup1 github.com/containerd/cgroups/v3/cgroup1/stats -# github.com/containerd/console v1.0.4 +# github.com/containerd/console v1.0.5 ## explicit; go 1.13 github.com/containerd/console # github.com/containerd/containerd/api v1.9.0 @@ -131,6 +141,20 @@ github.com/coreos/go-systemd/v22/dbus # github.com/cpuguy83/go-md2man/v2 v2.0.5 ## explicit; go 1.11 github.com/cpuguy83/go-md2man/v2/md2man +# github.com/cyphar/filepath-securejoin v0.6.0 +## explicit; go 1.18 +github.com/cyphar/filepath-securejoin +github.com/cyphar/filepath-securejoin/internal/consts +github.com/cyphar/filepath-securejoin/pathrs-lite +github.com/cyphar/filepath-securejoin/pathrs-lite/internal +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gopathrs +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux +github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs +github.com/cyphar/filepath-securejoin/pathrs-lite/procfs # github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 ## explicit; go 1.17 github.com/decred/dcrd/dcrec/secp256k1/v4 @@ -293,6 +317,9 @@ github.com/mdlayher/vsock # github.com/mitchellh/go-homedir v1.1.0 ## explicit github.com/mitchellh/go-homedir +# github.com/moby/sys/capability v0.4.0 +## explicit; go 1.21 +github.com/moby/sys/capability # github.com/moby/sys/mountinfo v0.7.2 ## explicit; go 1.17 github.com/moby/sys/mountinfo @@ -302,6 +329,9 @@ github.com/moby/sys/user # github.com/moby/sys/userns v0.1.0 ## explicit; go 1.21 github.com/moby/sys/userns +# github.com/mrunalp/fileutils v0.5.1 +## explicit; go 1.13 +github.com/mrunalp/fileutils # github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 ## explicit github.com/munnerz/goautoneg @@ -397,7 +427,14 @@ github.com/open-policy-agent/opa/util/decoding github.com/open-policy-agent/opa/version # github.com/opencontainers/cgroups v0.0.4 ## explicit; go 1.23.0 +github.com/opencontainers/cgroups github.com/opencontainers/cgroups/devices/config +github.com/opencontainers/cgroups/fs +github.com/opencontainers/cgroups/fs2 +github.com/opencontainers/cgroups/fscommon +github.com/opencontainers/cgroups/internal/path +github.com/opencontainers/cgroups/manager +github.com/opencontainers/cgroups/systemd # github.com/opencontainers/go-digest v1.0.0 ## explicit; go 1.13 github.com/opencontainers/go-digest @@ -405,13 +442,36 @@ github.com/opencontainers/go-digest ## explicit; go 1.18 github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.3.0 +# github.com/opencontainers/runc v1.3.3 ## explicit; go 1.23.0 +github.com/opencontainers/runc/internal/linux +github.com/opencontainers/runc/internal/pathrs +github.com/opencontainers/runc/internal/sys +github.com/opencontainers/runc/libcontainer +github.com/opencontainers/runc/libcontainer/apparmor +github.com/opencontainers/runc/libcontainer/capabilities +github.com/opencontainers/runc/libcontainer/configs +github.com/opencontainers/runc/libcontainer/configs/validate github.com/opencontainers/runc/libcontainer/devices +github.com/opencontainers/runc/libcontainer/exeseal +github.com/opencontainers/runc/libcontainer/intelrdt +github.com/opencontainers/runc/libcontainer/internal/userns +github.com/opencontainers/runc/libcontainer/keys +github.com/opencontainers/runc/libcontainer/logs +github.com/opencontainers/runc/libcontainer/seccomp +github.com/opencontainers/runc/libcontainer/seccomp/patchbpf +github.com/opencontainers/runc/libcontainer/system +github.com/opencontainers/runc/libcontainer/utils +github.com/opencontainers/runc/types # github.com/opencontainers/runtime-spec v1.2.1 ## explicit github.com/opencontainers/runtime-spec/specs-go github.com/opencontainers/runtime-spec/specs-go/features +# github.com/opencontainers/selinux v1.13.0 +## explicit; go 1.19 +github.com/opencontainers/selinux/go-selinux +github.com/opencontainers/selinux/go-selinux/label +github.com/opencontainers/selinux/pkg/pwalkdir # github.com/pelletier/go-toml v1.9.5 ## explicit; go 1.12 github.com/pelletier/go-toml @@ -440,6 +500,9 @@ github.com/rcrowley/go-metrics # github.com/russross/blackfriday/v2 v2.1.0 ## explicit github.com/russross/blackfriday/v2 +# github.com/seccomp/libseccomp-golang v0.10.0 +## explicit; go 1.14 +github.com/seccomp/libseccomp-golang # github.com/sirupsen/logrus v1.9.3 ## explicit; go 1.13 github.com/sirupsen/logrus @@ -565,8 +628,8 @@ golang.org/x/net/trace # golang.org/x/sync v0.16.0 ## explicit; go 1.23.0 golang.org/x/sync/errgroup -# golang.org/x/sys v0.35.0 -## explicit; go 1.23.0 +# golang.org/x/sys v0.39.0 +## explicit; go 1.24.0 golang.org/x/sys/execabs golang.org/x/sys/unix golang.org/x/sys/windows