From f3f1d3684d5b72adc6f9715bf1c1d49cc8a4f096 Mon Sep 17 00:00:00 2001 From: David Finkel Date: Fri, 6 Dec 2024 09:39:26 -0500 Subject: [PATCH 1/5] Make pparser.LineKVFileParser generic and typesafe Improve type-safety by pulling in the recent changes making LineKVFileParser generic. This guarantees that the index that has been generated for one type cannot be used with another. This also lets us eliminate a runtime check that was verifying that the correct type was passed. (since that's now enforced at compile-time) --- cgrouplimits/cgroup_linux.go | 7 ++----- cgrouplimits/host_linux.go | 9 ++------- pparser/proc_human_parser.go | 34 +++++++++++++++------------------- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/cgrouplimits/cgroup_linux.go b/cgrouplimits/cgroup_linux.go index 8c678c2..da2a92d 100644 --- a/cgrouplimits/cgroup_linux.go +++ b/cgrouplimits/cgroup_linux.go @@ -13,6 +13,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/vimeo/procstats" "github.com/vimeo/procstats/pparser" ) @@ -145,11 +146,7 @@ func ReadCGroupOOMControl(memCgroupPath string) (MemCgroupOOMControl, error) { return oomc, nil } -var memCgroupOOMControlFieldIdx *pparser.LineKVFileParser - -func init() { - memCgroupOOMControlFieldIdx = pparser.NewLineKVFileParser(MemCgroupOOMControl{}, " ") -} +var memCgroupOOMControlFieldIdx = pparser.NewLineKVFileParser(MemCgroupOOMControl{}, " ") // getCgroupOOMs looks up the current number of oom kills for the cgroup // specified by the path in its argument. diff --git a/cgrouplimits/host_linux.go b/cgrouplimits/host_linux.go index f52e042..ea92aad 100644 --- a/cgrouplimits/host_linux.go +++ b/cgrouplimits/host_linux.go @@ -135,13 +135,8 @@ type hostMemInfo struct { // hostMemInfoFieldIdx is an index of the name in /proc/meminfo to the field // index in the hostMemInfo struct. -var hostMemInfoFieldIdx *pparser.LineKVFileParser -var hostVMStatFieldIdx *pparser.LineKVFileParser - -func init() { - hostMemInfoFieldIdx = pparser.NewLineKVFileParser(hostMemInfo{}, ":") - hostVMStatFieldIdx = pparser.NewLineKVFileParser(hostVMStat{}, " ") -} +var hostMemInfoFieldIdx = pparser.NewLineKVFileParser(hostMemInfo{}, ":") +var hostVMStatFieldIdx = pparser.NewLineKVFileParser(hostVMStat{}, " ") // fields from /proc/vmstat pulled from "mm/vmstat.c" // generated with c&p of vmstat_text[] followed by some regexp mangling diff --git a/pparser/proc_human_parser.go b/pparser/proc_human_parser.go index dab20bd..daa0dde 100644 --- a/pparser/proc_human_parser.go +++ b/pparser/proc_human_parser.go @@ -79,10 +79,10 @@ func fieldIndex(t interface{}) (map[string]int, int, reflect.Kind) { // be of the concrete struct-type, not a pointer to that type. // Note: this is intended to be called once at startup for a type (usually // within an `init()` func or as a package-level variable declaration). -func NewLineKVFileParser(t interface{}, splitKey string) *LineKVFileParser { +func NewLineKVFileParser[T any](t T, splitKey string) *LineKVFileParser[T] { idx, unknownIdx, unknownKind := fieldIndex(t) - return &LineKVFileParser{ + return &LineKVFileParser[T]{ idx: idx, splitKey: splitKey, unknownFieldsIdx: unknownIdx, @@ -94,7 +94,7 @@ func NewLineKVFileParser(t interface{}, splitKey string) *LineKVFileParser { // LineKVFileParser provides a Parse(), it is not mutated by Parse(), and as // such is thread-agnostic. -type LineKVFileParser struct { +type LineKVFileParser[T any] struct { idx map[string]int splitKey string unknownFieldsIdx int @@ -109,7 +109,7 @@ func trimStringWithMultiplier(s string) (string, int64) { return s, 1 } -func (p *LineKVFileParser) fieldKind(fieldName string) reflect.Kind { +func (p *LineKVFileParser[T]) fieldKind(fieldName string) reflect.Kind { fieldIndex, knownField := p.idx[fieldName] if !knownField { return p.unknownKind @@ -117,7 +117,7 @@ func (p *LineKVFileParser) fieldKind(fieldName string) reflect.Kind { return p.structType.Field(fieldIndex).Type.Kind() } -func (p *LineKVFileParser) setIntField( +func (p *LineKVFileParser[T]) setIntField( outVal *reflect.Value, fieldName string, fieldValue int64) error { fieldIndex, knownField := p.idx[fieldName] var f reflect.Value @@ -154,7 +154,7 @@ func (p *LineKVFileParser) setIntField( return nil } -func (p *LineKVFileParser) setUintField( +func (p *LineKVFileParser[T]) setUintField( outVal *reflect.Value, fieldName string, fieldValue uint64) error { fieldIndex, knownField := p.idx[fieldName] var f reflect.Value @@ -191,7 +191,7 @@ func (p *LineKVFileParser) setUintField( return nil } -func (p *LineKVFileParser) setFloatField( +func (p *LineKVFileParser[T]) setFloatField( outVal *reflect.Value, fieldName string, fieldValue float64) error { fieldIndex, knownField := p.idx[fieldName] var f reflect.Value @@ -227,7 +227,7 @@ func (p *LineKVFileParser) setFloatField( return nil } -func (p *LineKVFileParser) setStringField( +func (p *LineKVFileParser[T]) setStringField( outVal *reflect.Value, fieldName, fieldValue string) error { fieldIndex, knownField := p.idx[fieldName] var f reflect.Value @@ -254,12 +254,8 @@ func (p *LineKVFileParser) setStringField( // Parse takes file-contents and an out-variable to populate. The out argument // must be a pointer to the same type as passed to NewLineKVFileParser. -func (p *LineKVFileParser) Parse(contentBytes []byte, out interface{}) error { +func (p *LineKVFileParser[T]) Parse(contentBytes []byte, out *T) error { outVal := reflect.ValueOf(out).Elem() - if outVal.Type() != p.structType { - return fmt.Errorf("mismatched types: indexed %s, but passed %s", - p.structType, outVal.Type()) - } b := bytes.NewBuffer(contentBytes) line, err := b.ReadString('\n') @@ -277,8 +273,8 @@ func (p *LineKVFileParser) Parse(contentBytes []byte, out interface{}) error { switch k { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: { - trimmedVal, mul := trimStringWithMultiplier(trimmedVal) - val, intParseErr := strconv.ParseInt(trimmedVal, 10, 64) + trimmedIntVal, mul := trimStringWithMultiplier(trimmedVal) + val, intParseErr := strconv.ParseInt(trimmedIntVal, 10, 64) if intParseErr != nil { return fmt.Errorf("failed to parse line %q: %s", line, intParseErr) @@ -291,8 +287,8 @@ func (p *LineKVFileParser) Parse(contentBytes []byte, out interface{}) error { } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: { - trimmedVal, mul := trimStringWithMultiplier(trimmedVal) - val, intParseErr := strconv.ParseUint(trimmedVal, 10, 64) + trimmedUintVal, mul := trimStringWithMultiplier(trimmedVal) + val, intParseErr := strconv.ParseUint(trimmedUintVal, 10, 64) if intParseErr != nil { return fmt.Errorf("failed to parse line %q: %s", line, intParseErr) @@ -305,8 +301,8 @@ func (p *LineKVFileParser) Parse(contentBytes []byte, out interface{}) error { } case reflect.Float32, reflect.Float64: { - trimmedVal, mul := trimStringWithMultiplier(trimmedVal) - val, floatParseErr := strconv.ParseFloat(trimmedVal, 64) + trimmedFloatVal, mul := trimStringWithMultiplier(trimmedVal) + val, floatParseErr := strconv.ParseFloat(trimmedFloatVal, 64) if floatParseErr != nil { return fmt.Errorf("failed to parse line %q: %s", line, floatParseErr) From 061cdaee0cf32b0df2449c5d4f7ff4ffd0426741 Mon Sep 17 00:00:00 2001 From: David Finkel Date: Fri, 6 Dec 2024 09:57:13 -0500 Subject: [PATCH 2/5] cgresolver: cgroup2 & cgroup1 resolution support Add a subpackage for resolving the relevant cgroup for a process, and finding a usable cgroup mount for that subsystem. In particular, this provides the ability to remove the rather problematic docker libcgroup implementation from our dependency-set. --- cgresolver/cg_path.go | 106 +++ cgresolver/cg_path_test.go | 93 +++ cgresolver/mountinfo_parse.go | 137 ++++ cgresolver/mountinfo_parse_test.go | 315 +++++++++ cgresolver/proc_cgroup.go | 284 ++++++++ cgresolver/proc_cgroup_test.go | 1045 ++++++++++++++++++++++++++++ cgrouplimits/host_linux.go | 6 +- go.mod | 9 +- go.sum | 1 + 9 files changed, 1992 insertions(+), 4 deletions(-) create mode 100644 cgresolver/cg_path.go create mode 100644 cgresolver/cg_path_test.go create mode 100644 cgresolver/mountinfo_parse.go create mode 100644 cgresolver/mountinfo_parse_test.go create mode 100644 cgresolver/proc_cgroup.go create mode 100644 cgresolver/proc_cgroup_test.go diff --git a/cgresolver/cg_path.go b/cgresolver/cg_path.go new file mode 100644 index 0000000..771cf02 --- /dev/null +++ b/cgresolver/cg_path.go @@ -0,0 +1,106 @@ +// package cgresolver contains helpers and types for resolving the CGroup associated with specific subsystems +// If you don't know what cgroup subsystems are, you probably want one of the higher-level interfaces in the parent package. +package cgresolver + +import ( + "fmt" + "os" + "slices" + "strconv" + "strings" +) + +// CGMode is an enum indicating which cgroup type is active for the returned controller +type CGMode uint8 + +const ( + CGModeUnknown CGMode = iota + // CGroup V1 + CGModeV1 + // CGroup V2 + CGModeV2 +) + +func cgroup2Mode(iscg2 bool) CGMode { + if iscg2 { + return CGModeV2 + } + return CGModeV1 +} + +// CGroupPath includes information about a cgroup. +type CGroupPath struct { + AbsPath string + MountPath string + Mode CGMode +} + +// Parent returns a CGroupPath for the parent directory as long as it wouldn't pass the root of the mountpoint. +// second return indicates whether a new path was returned. +func (c *CGroupPath) Parent() (CGroupPath, bool) { + // Remove any trailing slash + path := strings.TrimSuffix(c.AbsPath, string(os.PathSeparator)) + mnt := strings.TrimSuffix(c.MountPath, string(os.PathSeparator)) + if mnt == path { + return CGroupPath{ + AbsPath: path, + MountPath: mnt, + Mode: c.Mode, + }, false + } + lastSlashIdx := strings.LastIndexByte(path, byte(os.PathSeparator)) + if lastSlashIdx == -1 { + // This shouldn't happen + panic("invalid state: path \"" + path + "\" has no slashes and doesn't match the mountpoint") + } + return CGroupPath{ + AbsPath: path[:lastSlashIdx], + MountPath: mnt, // Strip any trailing slash in case one snuck in + Mode: c.Mode, + }, true +} + +// SelfSubsystemPath returns a CGroupPath for the cgroup associated with a specific subsystem for the current process. +func SelfSubsystemPath(subsystem string) (CGroupPath, error) { + return subsystemPath("self", subsystem) +} + +// PIDSubsystemPath returns a CGroupPath for the cgroup associated with a specific subsystem for the specified PID +func PIDSubsystemPath(pid int, subsystem string) (CGroupPath, error) { + return subsystemPath(strconv.Itoa(pid), subsystem) +} + +func subsystemPath(procSubDir string, subsystem string) (CGroupPath, error) { + cgSubSyses, cgSubSysReadErr := ParseReadCGSubsystems() + if cgSubSysReadErr != nil { + return CGroupPath{}, fmt.Errorf("failed to resolve subsystems to hierarchies: %w", cgSubSysReadErr) + } + cgIdx := slices.IndexFunc(cgSubSyses, func(c CGroupSubsystem) bool { + return c.Subsys == subsystem + }) + if cgIdx == -1 { + return CGroupPath{}, fmt.Errorf("no cgroup hierarchy associated with subsystem %q", subsystem) + } + cgHierID := cgSubSyses[cgIdx].Hierarchy + + procCGs, procCGsErr := resolveProcCGControllers(procSubDir) + if procCGsErr != nil { + return CGroupPath{}, fmt.Errorf("failed to resolve cgroup controllers: %w", procCGsErr) + } + + procCGIdx := slices.IndexFunc(procCGs, func(cg CGProcHierarchy) bool { return cg.HierarchyID == cgHierID }) + if procCGIdx == -1 { + return CGroupPath{}, fmt.Errorf("failed to resolve process cgroup controllers: %w", procCGsErr) + } + + cgMountInfo, mountInfoParseErr := CGroupMountInfo() + if mountInfoParseErr != nil { + return CGroupPath{}, fmt.Errorf("failed to parse mountinfo: %w", mountInfoParseErr) + } + + cgPath, cgPathErr := procCGs[procCGIdx].cgPath(cgMountInfo) + if cgPathErr != nil { + return CGroupPath{}, fmt.Errorf("failed to resolve filesystem path for cgroup %+v: %w", procCGs[procCGIdx], cgPathErr) + } + return cgPath, nil +} diff --git a/cgresolver/cg_path_test.go b/cgresolver/cg_path_test.go new file mode 100644 index 0000000..97fced4 --- /dev/null +++ b/cgresolver/cg_path_test.go @@ -0,0 +1,93 @@ +package cgresolver + +import "testing" + +func TestCGroupPathParent(t *testing.T) { + for _, tbl := range []struct { + name string + in CGroupPath + expParent CGroupPath + expNewParent bool + }{ + { + name: "cgroup_mount_root", + in: CGroupPath{ + AbsPath: "/sys/fs/cgroup", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expParent: CGroupPath{ + AbsPath: "/sys/fs/cgroup", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expNewParent: false, + }, + { + name: "cgroup_mount_root_strip_trailing_slashes", + in: CGroupPath{ + AbsPath: "/sys/fs/cgroup/", + MountPath: "/sys/fs/cgroup/", + Mode: CGModeV2, + }, + expParent: CGroupPath{ + AbsPath: "/sys/fs/cgroup", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expNewParent: false, + }, + { + name: "cgroup_mount_sub_cgroup_cgv1", + in: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b/c", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV1, + }, + expParent: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV1, + }, + expNewParent: true, + }, + { + name: "cgroup_mount_sub_cgroup_cgv2", + in: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b/c", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expParent: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expNewParent: true, + }, + { + name: "cgroup_mount_sub_cgroup_strip_trailing_slash", + in: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b/c/", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expParent: CGroupPath{ + AbsPath: "/sys/fs/cgroup/a/b", + MountPath: "/sys/fs/cgroup", + Mode: CGModeV2, + }, + expNewParent: true, + }, + } { + t.Run(tbl.name, func(t *testing.T) { + par, np := tbl.in.Parent() + if np != tbl.expNewParent { + t.Errorf("unexpected OK value: %t; expected %t", np, tbl.expNewParent) + } + if par != tbl.expParent { + t.Errorf("unexpected parent CGroupPath:\n got %+v\n want %+v", par, tbl.expParent) + } + }) + } +} diff --git a/cgresolver/mountinfo_parse.go b/cgresolver/mountinfo_parse.go new file mode 100644 index 0000000..46512b5 --- /dev/null +++ b/cgresolver/mountinfo_parse.go @@ -0,0 +1,137 @@ +package cgresolver + +import ( + "fmt" + "os" + "strconv" + "strings" +) + +// Mount represents a cgroup or cgroup2 mount. +// Subsystems will be nil if the mount is for a unified hierarchy/cgroup v2 +// in that case, CGroupV2 will be true. +type Mount struct { + Mountpoint string + Root string + Subsystems []string + CGroupV2 bool // true if this is a cgroup2 mount +} + +const ( + mountinfoPath = "/proc/self/mountinfo" +) + +// CGroupMountInfo parses /proc/self/mountinfo and returns info about all cgroup and cgroup2 mounts +func CGroupMountInfo() ([]Mount, error) { + mountinfoContents, mntInfoReadErr := os.ReadFile(mountinfoPath) + if mntInfoReadErr != nil { + return nil, fmt.Errorf("failed to read contents of %s: %w", + mountinfoPath, mntInfoReadErr) + } + + mounts, mntsErr := getCGroupMountsFromMountinfo(string(mountinfoContents)) + if mntsErr != nil { + return nil, fmt.Errorf("failed to list cgroupfs mounts: %w", mntsErr) + } + + return mounts, nil +} + +func getCGroupMountsFromMountinfo(mountinfo string) ([]Mount, error) { + // mountinfo is line-delimited, then space-delimited + mountinfoLines := strings.Split(mountinfo, "\n") + if len(mountinfoLines) == 0 { + return nil, fmt.Errorf("unexpectedly empty mountinfo (one line): %q", mountinfo) + } + out := make([]Mount, 0, len(mountinfoLines)) + for _, line := range mountinfoLines { + if len(line) == 0 { + continue + } + sections := strings.SplitN(line, " - ", 2) + if len(sections) < 2 { + return nil, fmt.Errorf("missing section separator in line %q", line) + } + s2Fields := strings.SplitN(sections[1], " ", 3) + if len(s2Fields) < 3 { + return nil, fmt.Errorf("line %q contains %d fields in second section, expected 3", + line, len(s2Fields)) + + } + isCG2 := false + switch s2Fields[0] { + case "cgroup": + isCG2 = false + case "cgroup2": + isCG2 = true + default: + // skip anything that's not a cgroup + continue + } + s1Fields := strings.Split(sections[0], " ") + if len(s1Fields) < 5 { + return nil, fmt.Errorf("too few fields in line %q before optional separator: %d; expected 5", + line, len(s1Fields)) + } + mntpnt, mntPntUnescapeErr := unOctalEscape(s1Fields[4]) + if mntPntUnescapeErr != nil { + return nil, fmt.Errorf("failed to unescape mountpoint %q: %w", s1Fields[4], mntPntUnescapeErr) + } + rootPath, rootUnescErr := unOctalEscape(s1Fields[3]) + if rootUnescErr != nil { + return nil, fmt.Errorf("failed to unescape mount root %q: %w", s1Fields[3], rootUnescErr) + } + mnt := Mount{ + CGroupV2: isCG2, + Mountpoint: mntpnt, + Root: rootPath, + Subsystems: nil, + } + // only bother with the mount options to find subsystems if cgroup v1 + if !isCG2 { + for _, mntOpt := range strings.Split(s2Fields[2], ",") { + switch mntOpt { + case "ro", "rw": + // These mount options are lies, (or at least + // only reflect the original mount, without + // considering the layering of later bind-mounts) + continue + case "": + continue + default: + mnt.Subsystems = append(mnt.Subsystems, mntOpt) + } + } + } + + out = append(out, mnt) + + } + return out, nil +} + +func unOctalEscape(str string) (string, error) { + b := strings.Builder{} + b.Grow(len(str)) + for { + backslashIdx := strings.IndexByte(str, byte('\\')) + if backslashIdx == -1 { + b.WriteString(str) + return b.String(), nil + } + b.WriteString(str[:backslashIdx]) + // if the end of the escape is beyond the end of the string, abort! + if backslashIdx+3 >= len(str) { + return "", fmt.Errorf("invalid offset: %d+3 >= len %d", backslashIdx, len(str)) + } + // slice out the octal 3-digit component + esc := str[backslashIdx+1 : backslashIdx+4] + asciiVal, parseUintErr := strconv.ParseUint(esc, 8, 8) + if parseUintErr != nil { + return "", fmt.Errorf("failed to parse escape value %q: %w", esc, parseUintErr) + } + b.WriteByte(byte(asciiVal)) + str = str[backslashIdx+4:] + } + +} diff --git a/cgresolver/mountinfo_parse_test.go b/cgresolver/mountinfo_parse_test.go new file mode 100644 index 0000000..ff5f2e5 --- /dev/null +++ b/cgresolver/mountinfo_parse_test.go @@ -0,0 +1,315 @@ +package cgresolver + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestUnOctalEscapeNoError(t *testing.T) { + for _, itbl := range []struct { + name string + in string + expected string + }{ + { + name: "noescape", + in: "abcd/def", + expected: "abcd/def", + }, { + name: "empty", + in: "", + expected: "", + }, { + name: "onechar", + in: "1", + expected: "1", + }, { + name: "octalnum", + in: "111", + expected: "111", + }, { + name: "escaped_slash", + in: "111\\134", + expected: "111\\", + }, { + name: "escaped_space", + in: "111\\040", + expected: "111 ", + }, + } { + tbl := itbl + t.Run(tbl.name, func(t *testing.T) { + t.Parallel() + + out, err := unOctalEscape(tbl.in) + require.NoError(t, err) + assert.Equal(t, tbl.expected, out) + }) + } + +} +func TestUnOctalEscapeWithError(t *testing.T) { + for _, itbl := range []struct { + name string + in string + expectedError string + }{ + { + name: "short_escape", + in: "111\\13", + expectedError: "invalid offset: 3+3 >= len 6", + }, { + name: "non-octal_digit", + in: "111\\049", + expectedError: "failed to parse escape value \"049\": strconv.ParseUint: parsing \"049\": invalid syntax", + }, + } { + tbl := itbl + t.Run(tbl.name, func(t *testing.T) { + t.Parallel() + + out, err := unOctalEscape(tbl.in) + assert.Empty(t, out) + assert.EqualError(t, err, tbl.expectedError) + }) + } + +} + +func TestParseMountInfoGentoo(t *testing.T) { + t.Parallel() + gentooMI := ` +26 34 0:5 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw +27 34 0:25 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw +28 34 0:6 / /dev rw,nosuid - devtmpfs devtmpfs rw,size=10240k,nr_inodes=2526523,mode=755 +29 28 0:26 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000 +30 28 0:27 / /dev/shm rw,nosuid,nodev,noexec - tmpfs tmpfs rw +31 34 0:28 / /run rw,nosuid,nodev,noexec - tmpfs tmpfs rw,mode=755 +35 34 252:1 /home /home rw,relatime - ext4 /dev/mapper/ubuntu--vg-root rw,errors=remount-ro,data=ordered +36 27 0:8 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime - securityfs securityfs rw +37 27 0:7 / /sys/kernel/debug rw,nosuid,nodev,noexec,relatime - debugfs debugfs rw +38 28 0:20 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw +39 27 0:22 / /sys/kernel/config rw,nosuid,nodev,noexec,relatime - configfs configfs rw +40 27 0:29 / /sys/fs/fuse/connections rw,nosuid,nodev,noexec,relatime - fusectl fusectl rw +41 27 0:21 / /sys/fs/selinux rw,relatime - selinuxfs selinuxfs rw +42 27 0:30 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime - pstore pstore rw +43 27 0:31 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime - efivarfs efivarfs rw +44 27 0:32 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs cgroup_root rw,size=10240k,mode=755 +45 44 0:33 / /sys/fs/cgroup/openrc rw,nosuid,nodev,noexec,relatime - cgroup openrc rw,release_agent=/lib/rc/sh/cgroup-release-agent.sh,name=openrc +46 44 0:34 / /sys/fs/cgroup/unified rw,nosuid,nodev,noexec,relatime - cgroup2 none rw,nsdelegate +47 44 0:35 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cpuset rw,cpuset +48 44 0:36 / /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cpu rw,cpu +49 44 0:37 / /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cpuacct rw,cpuacct +50 44 0:38 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup blkio rw,blkio +51 44 0:39 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup memory rw,memory +52 44 0:40 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup devices rw,devices +53 44 0:41 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup freezer rw,freezer +54 44 0:42 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime - cgroup net_cls rw,net_cls +55 44 0:43 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup perf_event rw,perf_event +60 26 0:48 / /proc/sys/fs/binfmt_misc rw,nosuid,nodev,noexec,relatime - binfmt_misc binfmt_misc rw +61 34 259:2 / /boot rw,relatime - ext2 /dev/nvme0n1p2 rw,errors=continue,user_xattr,acl +62 61 259:1 / /boot/efi rw,relatime - vfat /dev/nvme0n1p1 rw,fmask=0077,dmask=0077,codepage=437,iocharset=iso8859-1,shortname=mixed,errors=remount-ro +63 34 0:49 / /tmp rw,nodev,relatime - tmpfs tmpfs rw,size=4194304k +` + + mi, miErr := getCGroupMountsFromMountinfo(gentooMI) + require.NoError(t, miErr) + assert.Equal(t, []Mount{{ + Mountpoint: "/sys/fs/cgroup/openrc", + Root: "/", + Subsystems: []string{"release_agent=/lib/rc/sh/cgroup-release-agent.sh", "name=openrc"}, + }, { + Mountpoint: "/sys/fs/cgroup/unified", + Root: "/", + Subsystems: nil, + CGroupV2: true, + }, { + Mountpoint: "/sys/fs/cgroup/cpuset", + Root: "/", + Subsystems: []string{"cpuset"}, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + }, { + Mountpoint: "/sys/fs/cgroup/cpuacct", + Root: "/", + Subsystems: []string{"cpuacct"}, + }, { + Mountpoint: "/sys/fs/cgroup/blkio", + Root: "/", + Subsystems: []string{"blkio"}, + }, { + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + }, { + Mountpoint: "/sys/fs/cgroup/devices", + Root: "/", + Subsystems: []string{"devices"}, + }, { + Mountpoint: "/sys/fs/cgroup/freezer", + Root: "/", + Subsystems: []string{"freezer"}, + }, { + Mountpoint: "/sys/fs/cgroup/net_cls", + Root: "/", + Subsystems: []string{"net_cls"}, + }, { + Mountpoint: "/sys/fs/cgroup/perf_event", + Root: "/", + Subsystems: []string{"perf_event"}, + }, + }, mi) +} +func TestParseMountInfoQuicksetMinikube(t *testing.T) { + t.Parallel() + minikubeMI := ` +2819 2058 0:275 / / ro,relatime master:668 - overlay overlay rw,lowerdir=/var/lib/docker/overlay2/l/RIUYXOSUIR7KO32JEVCVXUS6JD:/var/lib/docker/overlay2/l/RT3HYWIQ42FP2FYIMLIF4KABW7:/var/lib/docker/overlay2/l/H2KG4S7FFKIOF7IRWK7XWSOTHI:/var/lib/docker/overlay2/l/HQYACZQ7MV6KBWBVFGBW3BJO7G:/var/lib/docker/overlay2/l/D6NLXBDJO4H2VLF6DXXGRXRHOF:/var/lib/docker/overlay2/l/7F5WLDFAF67AH3XWJM2BQ3Q4XD:/var/lib/docker/overlay2/l/CD442IXNIXYBYPPXTSJZAROGI7:/var/lib/docker/overlay2/l/NRMJROJOAW2RRCCKQYP3NGAFK3,upperdir=/var/lib/docker/overlay2/91695926b2d7a38a1029279c8a1608613758e3274ea9e7261865785940b3e131/diff,workdir=/var/lib/docker/overlay2/91695926b2d7a38a1029279c8a1608613758e3274ea9e7261865785940b3e131/work +2820 2819 0:279 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw +2821 2819 0:280 / /dev rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +2822 2821 0:281 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666 +2823 2819 0:269 / /sys ro,nosuid,nodev,noexec,relatime - sysfs sysfs ro +2824 2823 0:282 / /sys/fs/cgroup ro,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755 +2825 2824 0:22 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/systemd ro,nosuid,nodev,noexec,relatime master:7 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd +2826 2824 0:24 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/blkio ro,nosuid,nodev,noexec,relatime master:11 - cgroup cgroup rw,blkio +2827 2824 0:25 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/hugetlb ro,nosuid,nodev,noexec,relatime master:12 - cgroup cgroup rw,hugetlb +2828 2824 0:26 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/perf_event ro,nosuid,nodev,noexec,relatime master:13 - cgroup cgroup rw,perf_event +2829 2824 0:27 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/freezer ro,nosuid,nodev,noexec,relatime master:14 - cgroup cgroup rw,freezer +2830 2824 0:28 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/pids ro,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,pids +2831 2824 0:29 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/net_cls,net_prio ro,nosuid,nodev,noexec,relatime master:16 - cgroup cgroup rw,net_cls,net_prio +2832 2824 0:30 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime master:17 - cgroup cgroup rw,memory +2833 2824 0:31 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/cpu,cpuacct ro,nosuid,nodev,noexec,relatime master:18 - cgroup cgroup rw,cpu,cpuacct +2834 2824 0:32 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/devices ro,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,devices +2835 2824 0:33 /kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448 /sys/fs/cgroup/cpuset ro,nosuid,nodev,noexec,relatime master:20 - cgroup cgroup rw,cpuset +2836 2821 0:265 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw +2837 2819 0:261 / /tmp rw,relatime - tmpfs tmpfs rw +2838 2819 0:21 / /mnt/cgroups ro,nosuid,nodev,noexec master:6 - tmpfs tmpfs ro,mode=755 +2839 2838 0:22 / /mnt/cgroups/systemd rw,nosuid,nodev,noexec,relatime master:7 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd +2840 2838 0:24 / /mnt/cgroups/blkio rw,nosuid,nodev,noexec,relatime master:11 - cgroup cgroup rw,blkio +2841 2838 0:25 / /mnt/cgroups/hugetlb rw,nosuid,nodev,noexec,relatime master:12 - cgroup cgroup rw,hugetlb +2842 2838 0:26 / /mnt/cgroups/perf_event rw,nosuid,nodev,noexec,relatime master:13 - cgroup cgroup rw,perf_event +2843 2838 0:27 / /mnt/cgroups/freezer rw,nosuid,nodev,noexec,relatime master:14 - cgroup cgroup rw,freezer +2844 2838 0:28 / /mnt/cgroups/pids rw,nosuid,nodev,noexec,relatime master:15 - cgroup cgroup rw,pids +2845 2838 0:29 / /mnt/cgroups/net_cls,net_prio rw,nosuid,nodev,noexec,relatime master:16 - cgroup cgroup rw,net_cls,net_prio +2846 2838 0:30 / /mnt/cgroups/memory rw,nosuid,nodev,noexec,relatime master:17 - cgroup cgroup rw,memory +2847 2838 0:31 / /mnt/cgroups/cpu,cpuacct rw,nosuid,nodev,noexec,relatime master:18 - cgroup cgroup rw,cpu,cpuacct +2848 2838 0:32 / /mnt/cgroups/devices rw,nosuid,nodev,noexec,relatime master:19 - cgroup cgroup rw,devices +2849 2838 0:33 / /mnt/cgroups/cpuset rw,nosuid,nodev,noexec,relatime master:20 - cgroup cgroup rw,cpuset +2850 2821 253:1 /var/lib/kubelet/pods/d05ceb29-4d8b-4c43-9eaa-d7acddc25247/containers/quickset/a053e3fe /dev/termination-log rw,relatime - ext4 /dev/vda1 rw +2851 2819 253:1 /var/lib/kubelet/pods/d05ceb29-4d8b-4c43-9eaa-d7acddc25247/volumes/kubernetes.io~configmap/node-cfg /etc/configs ro,relatime - ext4 /dev/vda1 rw +2852 2819 253:1 /var/lib/docker/containers/cb46c9f8f0ea80a1eb613fdd3e90b523939114575b6ee3e7e8bc1f4f0c8d0254/resolv.conf /etc/resolv.conf ro,relatime - ext4 /dev/vda1 rw +2853 2819 253:1 /var/lib/docker/containers/cb46c9f8f0ea80a1eb613fdd3e90b523939114575b6ee3e7e8bc1f4f0c8d0254/hostname /etc/hostname ro,relatime - ext4 /dev/vda1 rw +2854 2819 253:1 /var/lib/kubelet/pods/d05ceb29-4d8b-4c43-9eaa-d7acddc25247/etc-hosts /etc/hosts rw,relatime - ext4 /dev/vda1 rw +2855 2821 0:264 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k +2856 2819 0:262 / /run/secrets/kubernetes.io/serviceaccount ro,relatime - tmpfs tmpfs rw +2059 2820 0:279 /asound /proc/asound ro,relatime - proc proc rw +2060 2820 0:279 /bus /proc/bus ro,relatime - proc proc rw +2061 2820 0:279 /fs /proc/fs ro,relatime - proc proc rw +2062 2820 0:279 /irq /proc/irq ro,relatime - proc proc rw +2063 2820 0:279 /sys /proc/sys ro,relatime - proc proc rw +2066 2820 0:279 /sysrq-trigger /proc/sysrq-trigger ro,relatime - proc proc rw +2067 2820 0:338 / /proc/acpi ro,relatime - tmpfs tmpfs ro +2068 2820 0:280 /null /proc/kcore rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +2069 2820 0:280 /null /proc/keys rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +2074 2820 0:280 /null /proc/timer_list rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755 +2075 2820 0:339 / /proc/scsi ro,relatime - tmpfs tmpfs ro +2076 2823 0:340 / /sys/firmware ro,relatime - tmpfs tmpfs ro +` + + mi, miErr := getCGroupMountsFromMountinfo(minikubeMI) + require.NoError(t, miErr) + const podSubGrp = "/kubepods/podd05ceb29-4d8b-4c43-9eaa-d7acddc25247/db332e7610fcb7c5a4d9eaa782285e61e49fa5c8403d756ea8ae2cffc99dc448" + assert.Equal(t, []Mount{{ + Mountpoint: "/sys/fs/cgroup/systemd", + Root: podSubGrp, + Subsystems: []string{"xattr", "release_agent=/usr/lib/systemd/systemd-cgroups-agent", "name=systemd"}, + }, { + Mountpoint: "/sys/fs/cgroup/blkio", + Root: podSubGrp, + Subsystems: []string{"blkio"}, + }, { + Mountpoint: "/sys/fs/cgroup/hugetlb", + Root: podSubGrp, + Subsystems: []string{"hugetlb"}, + }, { + Mountpoint: "/sys/fs/cgroup/perf_event", + Root: podSubGrp, + Subsystems: []string{"perf_event"}, + }, { + Mountpoint: "/sys/fs/cgroup/freezer", + Root: podSubGrp, + Subsystems: []string{"freezer"}, + }, { + Mountpoint: "/sys/fs/cgroup/pids", + Root: podSubGrp, + Subsystems: []string{"pids"}, + }, { + Mountpoint: "/sys/fs/cgroup/net_cls,net_prio", + Root: podSubGrp, + Subsystems: []string{"net_cls", "net_prio"}, + }, { + Mountpoint: "/sys/fs/cgroup/memory", + Root: podSubGrp, + Subsystems: []string{"memory"}, + }, { + Mountpoint: "/sys/fs/cgroup/cpu,cpuacct", + Root: podSubGrp, + Subsystems: []string{"cpu", "cpuacct"}, + }, { + Mountpoint: "/sys/fs/cgroup/devices", + Root: podSubGrp, + Subsystems: []string{"devices"}, + }, { + Mountpoint: "/sys/fs/cgroup/cpuset", + Root: podSubGrp, + Subsystems: []string{"cpuset"}, + }, { + Mountpoint: "/mnt/cgroups/systemd", + Root: "/", + Subsystems: []string{"xattr", "release_agent=/usr/lib/systemd/systemd-cgroups-agent", "name=systemd"}, + }, { + Mountpoint: "/mnt/cgroups/blkio", + Root: "/", + Subsystems: []string{"blkio"}, + }, { + Mountpoint: "/mnt/cgroups/hugetlb", + Root: "/", + Subsystems: []string{"hugetlb"}, + }, { + Mountpoint: "/mnt/cgroups/perf_event", + Root: "/", + Subsystems: []string{"perf_event"}, + }, { + Mountpoint: "/mnt/cgroups/freezer", + Root: "/", + Subsystems: []string{"freezer"}, + }, { + Mountpoint: "/mnt/cgroups/pids", + Root: "/", + Subsystems: []string{"pids"}, + }, { + Mountpoint: "/mnt/cgroups/net_cls,net_prio", + Root: "/", + Subsystems: []string{"net_cls", "net_prio"}, + }, { + Mountpoint: "/mnt/cgroups/memory", + Root: "/", + Subsystems: []string{"memory"}, + }, { + Mountpoint: "/mnt/cgroups/cpu,cpuacct", + Root: "/", + Subsystems: []string{"cpu", "cpuacct"}, + }, { + Mountpoint: "/mnt/cgroups/devices", + Root: "/", + Subsystems: []string{"devices"}, + }, { + Mountpoint: "/mnt/cgroups/cpuset", + Root: "/", + Subsystems: []string{"cpuset"}, + }, + }, mi) +} diff --git a/cgresolver/proc_cgroup.go b/cgresolver/proc_cgroup.go new file mode 100644 index 0000000..93a9f9b --- /dev/null +++ b/cgresolver/proc_cgroup.go @@ -0,0 +1,284 @@ +package cgresolver + +import ( + "bytes" + "errors" + "fmt" + "os" + "path/filepath" + "slices" + "strconv" + "strings" +) + +// CGroupV2HierarchyID is a convenience constant indicating the hierarchy ID for the V2 cgroup hierarchy +const CGroupV2HierarchyID = 0 + +// CGProcHierarchy describes a specific CGroup subsystem/controller/hierarchy and path for a parsed /proc//cgroup +type CGProcHierarchy struct { + HierarchyID int // 0 for v2; refs /proc/cgroups for v1 + SubsystemsCSV string // empty for v2; set of controllers/subsystem names for this hierarchy (CSV) + Subsystems []string // set of v1 subsystems/controllers (HierarchiesCSV split) + Path string // path relative to mountpoint +} + +func (c *CGProcHierarchy) cgPath(mountpoints []Mount) (CGroupPath, error) { + for _, mp := range mountpoints { + // Skip any mountpoints originating outside our cgroup namespace + // From cgroup_namespaces(7): + // When reading the cgroup memberships of a "target" process from /proc/pid/cgroup, + // the pathname shown in the third field of each record will be relative to the + // reading process's root directory for the corresponding cgroup hierarchy. If the + // cgroup directory of the target process lies outside the root directory of the + // reading process's cgroup namespace, then the pathname will show ../ entries for + // each ancestor level in the cgroup hierarchy. + if strings.HasPrefix(mp.Root, "/..") { + continue + } + if (mp.CGroupV2 && c.HierarchyID == CGroupV2HierarchyID) || slices.Equal(mp.Subsystems, c.Subsystems) { + relCGPath, relErr := filepath.Rel(mp.Root, c.Path) + if relErr != nil || strings.HasPrefix(relCGPath, "../") { + // bind-mount for a different sub-tree of the cgroups v2 hierarchy + continue + } + return CGroupPath{AbsPath: filepath.Join(mp.Mountpoint, relCGPath), MountPath: mp.Mountpoint, Mode: cgroup2Mode(mp.CGroupV2)}, nil + } + } + return CGroupPath{}, fmt.Errorf("no usable mountpoints found for hierarchy %d and path %q (found %d cgroup/cgroup2 mounts)", + c.HierarchyID, c.Path, len(mountpoints)) +} + +func parseProcPidCgroup(content []byte) ([]CGProcHierarchy, error) { + lines := bytes.Split(bytes.TrimSpace(content), []byte("\n")) + + out := make([]CGProcHierarchy, 0, len(lines)) + + // from cgroups(7): + // /proc/[pid]/cgroup (since Linux 2.6.24) + // This file describes control groups to which the process with the corresponding PID be‐ + // longs. The displayed information differs for cgroups version 1 and version 2 hierarchies. + // + // For each cgroup hierarchy of which the process is a member, there is one entry containing + // three colon-separated fields: + // + // hierarchy-ID:controller-list:cgroup-path + // + // For example: + // + // 5:cpuacct,cpu,cpuset:/daemons + // + // The colon-separated fields are, from left to right: + // + // [1] For cgroups version 1 hierarchies, this field contains a unique hierarchy ID number + // that can be matched to a hierarchy ID in /proc/cgroups. For the cgroups version 2 + // hierarchy, this field contains the value 0. + // + // [2] For cgroups version 1 hierarchies, this field contains a comma-separated list of the + // controllers bound to the hierarchy. For the cgroups version 2 hierarchy, this field + // is empty. + // + // [3] This field contains the pathname of the control group in the hierarchy to which the + // process belongs. This pathname is relative to the mount point of the hierarchy. + + for i, line := range lines { + if len(line) == 0 { + // skip empty lines + continue + } + parts := bytes.SplitN(line, []byte(":"), 3) + if len(parts) != 3 { + return nil, fmt.Errorf("line %d (%q) has incorrect number of parts: %d; expected %d", i, line, len(parts), 3) + } + hID, hIDErr := strconv.Atoi(string(parts[0])) + if hIDErr != nil { + return nil, fmt.Errorf("line %d has non-integer hierarchy ID (%q): %w", i, string(parts[0]), hIDErr) + } + ss := strings.Split(string(parts[1]), ",") + if len(ss) == 1 && ss[0] == "" { + ss = []string{} + } + out = append(out, CGProcHierarchy{ + HierarchyID: hID, + SubsystemsCSV: string(parts[1]), + Path: string(parts[2]), + Subsystems: ss, + }) + } + return out, nil +} + +func resolveProcCGControllers(pid string) ([]CGProcHierarchy, error) { + cgPath := filepath.Join("/proc", pid, "cgroup") + cgContents, readErr := os.ReadFile(cgPath) + if readErr != nil { + return nil, fmt.Errorf("failed to read %q: %w", cgPath, readErr) + } + + return parseProcPidCgroup(cgContents) +} + +// SelfCGSubsystems returns information about all the controllers associated with the current process +func SelfCGSubsystems() ([]CGProcHierarchy, error) { + return resolveProcCGControllers("self") +} + +// PidCGSubsystems returns information about all the CGroup controllers associated with the passed pid +func PidCGSubsystems(pid int) ([]CGProcHierarchy, error) { + return resolveProcCGControllers(strconv.Itoa(pid)) +} + +// ErrMissingCG2Mount indicates a missing cgroup v2 mount when resolving which controllers belong to which hierarchy +var ErrMissingCG2Mount = errors.New("cgroup2 mount covering relevant cgroup(s) not present in the current mount namespace, but cgroupv2 controller present in /proc//cgroup") + +// CGroupV2QuasiSubsystemName is a constant used by MapSubsystems to refer to +// the cgroup2 hierarchy (since the /proc//cgroup file lacks subsystems +// for cgroup2) +const CGroupV2QuasiSubsystemName = "cgroup2 unified hierarchy" + +// MapSubsystems creates a map from the controller-name to the entry in the passed slice +func MapSubsystems(controllers []CGProcHierarchy) map[string]*CGProcHierarchy { + out := make(map[string]*CGProcHierarchy, len(controllers)) + for i, hier := range controllers { + for _, ctrlr := range hier.Subsystems { + out[ctrlr] = &controllers[i] + } + if hier.HierarchyID == CGroupV2HierarchyID { + out[CGroupV2QuasiSubsystemName] = &controllers[i] + } + } + return out +} + +// CGroupSubsystem models a row in /proc/cgroups +type CGroupSubsystem struct { + Subsys string // name of the subsystem + Hierarchy int // hierarchy ID number (0 for cgroup2) + NumCGroups int // number of cgroups in that hierarchy using this controller + Enabled bool // controller enabled? +} + +// ParseReadCGSubsystems reads the /proc/cgroups pseudofile, and returns a slice of subsystem info, including which hierarchies each belongs to. +func ParseReadCGSubsystems() ([]CGroupSubsystem, error) { + procCG, procCGErr := os.ReadFile("/proc/cgroups") + if procCGErr != nil { + return nil, fmt.Errorf("failed to read /proc/cgroups: %w", procCGErr) + } + return parseCGSubsystems(string(procCG)) +} + +func parseCGSubsystems(procCgroups string) ([]CGroupSubsystem, error) { + lines := strings.Split(procCgroups, "\n") + headers := strings.Fields(strings.TrimLeft(lines[0], "#")) + if len(headers) < 2 { + return nil, fmt.Errorf("insufficient fields %d; need at least %d (expected 4)", len(headers), 2) + } + // Fast-common-path which should always hit if the number of columns doesn't change + extractRow := func(vals []string) (CGroupSubsystem, error) { + if len(vals) != 4 { + return CGroupSubsystem{}, fmt.Errorf("unexpected number of columns %d (doesn't match headers); expected %d", len(vals), 4) + } + hierNum, hierParseErr := strconv.Atoi(vals[1]) + if hierParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse hierarchy number: %q: %w", vals[1], hierParseErr) + } + numCG, numCGParseErr := strconv.Atoi(vals[2]) + if numCGParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse cgroup count: %q: %w", vals[2], numCGParseErr) + } + enabled, enabledParseErr := strconv.ParseBool(vals[3]) + if enabledParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse cgroup enabled: %q: %w", vals[3], enabledParseErr) + } + return CGroupSubsystem{ + Subsys: vals[0], + Hierarchy: hierNum, + NumCGroups: numCG, + Enabled: enabled, + }, nil + } + const noCol = -1 // constant to designate missing columns + expCols := [...]string{"subsys_name", "hierarchy", "num_cgroups", "enabled"} + if !slices.Equal(expCols[:], headers) { + subsysCol := noCol + hierCol := noCol + nCGCol := noCol + enabledCol := noCol + + // The list and/or order of columns changed, so we need to remap them. + // we do, however have a minimum of just subsys_name and hierarchy columns + for i, colHead := range headers { + switch strings.ToLower(colHead) { + case "subsys_name": + if subsysCol != noCol { + return nil, fmt.Errorf("multiple subsys_name columns at index %d and %d", subsysCol, i) + } + subsysCol = i + case "hierarchy": + if hierCol != noCol { + return nil, fmt.Errorf("multiple hierarchy columns at index %d and %d", hierCol, i) + } + hierCol = i + case "num_cgroups": + if nCGCol != noCol { + return nil, fmt.Errorf("multiple num_cgroups columns at index %d and %d", nCGCol, i) + } + nCGCol = i + case "enabled": + if enabledCol != noCol { + return nil, fmt.Errorf("multiple enabled columns at index %d and %d", enabledCol, i) + } + enabledCol = i + } + // let unknown columns fall through + } + if subsysCol == noCol || hierCol == noCol { + return nil, fmt.Errorf("missing critical column subsystem_name %t or hierarchy %t; columns: %q", subsysCol == noCol, hierCol == noCol, headers) + } + extractRow = func(vals []string) (CGroupSubsystem, error) { + if len(vals) != len(headers) { + return CGroupSubsystem{}, fmt.Errorf("unexpected number of columns %d (doesn't match headers); expected %d", len(vals), len(headers)) + } + hierNum, hierParseErr := strconv.Atoi(vals[hierCol]) + if hierParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse hierarchy number: %q: %w", vals[hierCol], hierParseErr) + } + rowOut := CGroupSubsystem{ + Subsys: vals[subsysCol], + Hierarchy: hierNum, + NumCGroups: 0, + Enabled: true, // default to true, so we consider anything that's listed as enabled if that column disappears + } + if nCGCol != noCol { + numCG, numCGParseErr := strconv.Atoi(vals[nCGCol]) + if numCGParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse cgroup count: %q: %w", vals[nCGCol], numCGParseErr) + } + rowOut.NumCGroups = numCG + } + if enabledCol != noCol { + enabled, enabledParseErr := strconv.ParseBool(vals[enabledCol]) + if enabledParseErr != nil { + return CGroupSubsystem{}, fmt.Errorf("unable to parse cgroup enabled: %q: %w", vals[enabledCol], enabledParseErr) + } + rowOut.Enabled = enabled + } + return rowOut, nil + } + } + + out := make([]CGroupSubsystem, 0, len(lines)-1) + for i, line := range lines[1:] { + if len(line) == 0 { + // skip empty lines (probably trailing) + continue + } + lineVals := strings.Fields(line) + extractedLine, extLineErr := extractRow(lineVals) + if extLineErr != nil { + return nil, fmt.Errorf("failed to parse line %d: %w", i+1, extLineErr) + } + out = append(out, extractedLine) + } + + return out, nil +} diff --git a/cgresolver/proc_cgroup_test.go b/cgresolver/proc_cgroup_test.go new file mode 100644 index 0000000..34aee45 --- /dev/null +++ b/cgresolver/proc_cgroup_test.go @@ -0,0 +1,1045 @@ +package cgresolver + +import ( + "errors" + "slices" + "testing" +) + +func TestCGPath(t *testing.T) { + for _, itbl := range []struct { + name string + hier CGProcHierarchy + mounts []Mount + expPath CGroupPath + expErr error + }{ + { + name: "cg1_root_mount", + hier: CGProcHierarchy{ + HierarchyID: 10, + SubsystemsCSV: "memory", + Subsystems: []string{"memory"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + mounts: []Mount{{ + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{ + AbsPath: "/sys/fs/cgroup/memory/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + MountPath: "/sys/fs/cgroup/memory", + Mode: CGModeV1, + }, + expErr: nil, + }, + { + name: "cg1_nonroot_mount", + hier: CGProcHierarchy{ + HierarchyID: 10, + SubsystemsCSV: "memory", + Subsystems: []string{"memory"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + mounts: []Mount{{ + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{ + AbsPath: "/sys/fs/cgroup/memory/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + MountPath: "/sys/fs/cgroup/memory", + Mode: CGModeV1, + }, + expErr: nil, + }, + { + name: "cg1_root_mount_skip_nonmmatching_subtree_mount", + hier: CGProcHierarchy{ + HierarchyID: 10, + SubsystemsCSV: "memory", + Subsystems: []string{"memory"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + mounts: []Mount{{ + Mountpoint: "/tmp/nero-fiddled-while-rome-burned/fowl", + Root: "/fizzlebit/foodle", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{ + AbsPath: "/sys/fs/cgroup/memory/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + MountPath: "/sys/fs/cgroup/memory", + Mode: CGModeV1, + }, + expErr: nil, + }, + { + name: "cg2_root_no_mount", + hier: CGProcHierarchy{ + HierarchyID: 0, + SubsystemsCSV: "", + Subsystems: []string{}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + mounts: []Mount{{ + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{}, + expErr: errors.New("no usable mountpoints found for hierarchy 0 and path \"/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd\" (found 2 cgroup/cgroup2 mounts)"), + }, + { + name: "cg2_root_mount", + hier: CGProcHierarchy{ + HierarchyID: 0, + SubsystemsCSV: "", + Subsystems: []string{""}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + mounts: []Mount{{ + Mountpoint: "/sys/fs/cgroup/blkio", + Root: "/", + Subsystems: []string{"blkio"}, + }, { + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/sys/fs/cgroup/unified", + Root: "/", + Subsystems: []string{}, + CGroupV2: true, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{ + AbsPath: "/sys/fs/cgroup/unified/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + MountPath: "/sys/fs/cgroup/unified", + Mode: CGModeV2, + }, + expErr: nil, + }, + { + name: "cg2_root_mount_cg_namespace_skip_root", + hier: CGProcHierarchy{ + HierarchyID: 0, + SubsystemsCSV: "", + Subsystems: []string{""}, + Path: "/foobar", + }, + mounts: []Mount{{ + Mountpoint: "/sys/fs/cgroup/blkio", + Root: "/", + Subsystems: []string{"blkio"}, + }, { + Mountpoint: "/sys/fs/cgroup/memory", + Root: "/", + Subsystems: []string{"memory"}, + CGroupV2: false, + }, { + Mountpoint: "/mnt/cgroups/unified", + Root: "/../../..", + Subsystems: []string{}, + CGroupV2: true, + }, { + Mountpoint: "/sys/fs/cgroup/unified", + Root: "/", + Subsystems: []string{}, + CGroupV2: true, + }, { + Mountpoint: "/sys/fs/cgroup/cpu", + Root: "/", + Subsystems: []string{"cpu"}, + CGroupV2: false, + }}, + expPath: CGroupPath{ + AbsPath: "/sys/fs/cgroup/unified/foobar", + MountPath: "/sys/fs/cgroup/unified", + Mode: CGModeV2, + }, + expErr: nil, + }, + } { + tbl := itbl + t.Run(tbl.name, func(t *testing.T) { + absPath, parseErr := tbl.hier.cgPath(tbl.mounts) + if parseErr != nil { + if tbl.expErr == nil { + t.Fatalf("unexpected error (expected nil): %s", parseErr) + } else if tbl.expErr.Error() != parseErr.Error() { + t.Fatalf("mismatched error:\n got %s\n want %s", parseErr, tbl.expErr) + } + return + } + if absPath != tbl.expPath { + t.Errorf("unexpected absolute path:\n got %q\n want %q", absPath, tbl.expPath) + } + }) + } +} + +func TestParseProcPidCgroup(t *testing.T) { + for _, itbl := range []struct { + name string + contents string + expOut []CGProcHierarchy + expErr error // matched as string if non-nil + }{ + { + name: "ubuntu_lunar_cgroup2", + contents: `0::/user.slice/user-1001.slice/session-2.scope +`, // include a trailing new line + expOut: []CGProcHierarchy{ + { + HierarchyID: 0, + SubsystemsCSV: "", + Subsystems: []string{}, + Path: "/user.slice/user-1001.slice/session-2.scope", + }, + }, + expErr: nil, // no error + }, + { + name: "ubuntu_lunar_cgroup2-bad-ID", + contents: `fizzlebit::/user.slice/user-1001.slice/session-2.scope +`, // include a trailing new line + expOut: nil, + expErr: errors.New("line 0 has non-integer hierarchy ID (\"fizzlebit\"): strconv.Atoi: parsing \"fizzlebit\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2-missing-path-part", + contents: `0: +`, // include a trailing new line + expOut: nil, + expErr: errors.New("line 0 (\"0:\") has incorrect number of parts: 2; expected 3"), // no error + }, + { + name: "gke_cos_linux 5.10", + contents: `12:pids:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +11:blkio:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +10:memory:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +9:devices:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +8:cpu,cpuacct:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +7:hugetlb:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +6:net_cls,net_prio:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +5:cpuset:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +4:rdma:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +3:freezer:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +2:perf_event:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +1:name=systemd:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +0::/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +`, // include a trailing new line + expOut: []CGProcHierarchy{ + { + HierarchyID: 12, + SubsystemsCSV: "pids", + Subsystems: []string{"pids"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 11, + SubsystemsCSV: "blkio", + Subsystems: []string{"blkio"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 10, + SubsystemsCSV: "memory", + Subsystems: []string{"memory"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 9, + SubsystemsCSV: "devices", + Subsystems: []string{"devices"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 8, + SubsystemsCSV: "cpu,cpuacct", + Subsystems: []string{"cpu", "cpuacct"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 7, + SubsystemsCSV: "hugetlb", + Subsystems: []string{"hugetlb"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 6, + SubsystemsCSV: "net_cls,net_prio", + Subsystems: []string{"net_cls", "net_prio"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 5, + SubsystemsCSV: "cpuset", + Subsystems: []string{"cpuset"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 4, + SubsystemsCSV: "rdma", + Subsystems: []string{"rdma"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 3, + SubsystemsCSV: "freezer", + Subsystems: []string{"freezer"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 2, + SubsystemsCSV: "perf_event", + Subsystems: []string{"perf_event"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 1, + SubsystemsCSV: "name=systemd", + Subsystems: []string{"name=systemd"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 0, + SubsystemsCSV: "", + Subsystems: []string{}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + }, + expErr: nil, // no error + }, + { + name: "gke_cos_linux-5.10_truncated_interstitial_newline", + contents: `12:pids:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +11:blkio:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +10:memory:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd + +9:devices:/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd +`, // include a trailing new line + expOut: []CGProcHierarchy{ + { + HierarchyID: 12, + SubsystemsCSV: "pids", + Subsystems: []string{"pids"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 11, + SubsystemsCSV: "blkio", + Subsystems: []string{"blkio"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 10, + SubsystemsCSV: "memory", + Subsystems: []string{"memory"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, { + HierarchyID: 9, + SubsystemsCSV: "devices", + Subsystems: []string{"devices"}, + Path: "/kubepods/pod87a5b680-98ab-4850-9f2b-df5062206b0d/4d1e4a9860ffb2ca715726deefa957557e7d269762fb1ec83954cd173220fbbd", + }, + }, + expErr: nil, // no error + }, + } { + tbl := itbl + t.Run(tbl.name, func(t *testing.T) { + cgph, parseErr := parseProcPidCgroup([]byte(tbl.contents)) + if parseErr != nil { + if tbl.expErr == nil { + t.Fatalf("unexpected error (expected nil): %s", parseErr) + } else if tbl.expErr.Error() != parseErr.Error() { + t.Fatalf("mismatched error:\n got %s\n want %s", parseErr, tbl.expErr) + } + return + } + if len(cgph) != len(tbl.expOut) { + t.Errorf("unexpected length %d; expected %d", len(cgph), len(tbl.expOut)) + } + for i, cg := range cgph { + if i >= len(tbl.expOut) { + t.Errorf("unexpected element %d at end of output: %+v", i, cg) + continue + } + expCG := tbl.expOut[i] + if cg.HierarchyID != expCG.HierarchyID { + t.Errorf("%d: mismatched hierarchy IDs: got %d; want %d", i, cg.HierarchyID, expCG.HierarchyID) + } + if cg.SubsystemsCSV != expCG.SubsystemsCSV { + t.Errorf("%d: mismatched subsystem csv: got %s; want %s", i, cg.SubsystemsCSV, expCG.SubsystemsCSV) + } + if cg.Path != expCG.Path { + t.Errorf("%d: mismatched hierarchy IDs: got %s; want %s", i, cg.Path, expCG.Path) + } + if !slices.Equal(cg.Subsystems, expCG.Subsystems) { + t.Errorf("%d: mismatched subsystems:\n got %q\n want %q", i, cg.Subsystems, expCG.Subsystems) + } + } + }) + } +} + +func TestParseCGSubsystems(t *testing.T) { + for _, itbl := range []struct { + name string + contents string + expOut []CGroupSubsystem + expErr error // matched as string if non-nil + }{ + { + name: "ubuntu_lunar_cgroup2_only", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu 0 179 1 +cpuacct 0 179 1 +blkio 0 179 1 +memory 0 179 1 +devices 0 179 1 +freezer 0 179 1 +net_cls 0 179 1 +perf_event 0 179 1 +net_prio 0 179 1 +hugetlb 0 179 1 +pids 0 179 1 +rdma 0 179 1 +misc 0 179 1 +`, // include a trailing newline + expOut: []CGroupSubsystem{ + { + Subsys: "cpuset", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpu", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpuacct", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "blkio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "memory", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "devices", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "freezer", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_cls", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "perf_event", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_prio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "hugetlb", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "pids", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "rdma", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "misc", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, + }, + expErr: nil, + }, + { + name: "gke_cos_linux_5.10", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 9 42 1 +cpu 2 328 1 +cpuacct 2 328 1 +blkio 4 87 1 +memory 8 361 1 +devices 6 82 1 +freezer 11 42 1 +net_cls 5 42 1 +perf_event 3 42 1 +net_prio 5 42 1 +hugetlb 10 42 1 +pids 12 87 1 +rdma 7 42 1 +`, // include a trailing newline + expOut: []CGroupSubsystem{ + { + Subsys: "cpuset", + Hierarchy: 9, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "cpu", + Hierarchy: 2, + NumCGroups: 328, + Enabled: true, + }, { + Subsys: "cpuacct", + Hierarchy: 2, + NumCGroups: 328, + Enabled: true, + }, { + Subsys: "blkio", + Hierarchy: 4, + NumCGroups: 87, + Enabled: true, + }, { + Subsys: "memory", + Hierarchy: 8, + NumCGroups: 361, + Enabled: true, + }, { + Subsys: "devices", + Hierarchy: 6, + NumCGroups: 82, + Enabled: true, + }, { + Subsys: "freezer", + Hierarchy: 11, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "net_cls", + Hierarchy: 5, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "perf_event", + Hierarchy: 3, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "net_prio", + Hierarchy: 5, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "hugetlb", + Hierarchy: 10, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "pids", + Hierarchy: 12, + NumCGroups: 87, + Enabled: true, + }, { + Subsys: "rdma", + Hierarchy: 7, + NumCGroups: 42, + Enabled: true, + }, + }, + expErr: nil, + }, + { + name: "gke_cos_linux_5.10-reoordered_num_cgroups_enabled-and-trailing-whitespace", + contents: `#subsys_name hierarchy enabled num_cgroups +cpuset 9 1 42 +cpu 2 1 328 +cpuacct 2 1 328 +blkio 4 1 87 +memory 8 1 361 +devices 6 1 82 +freezer 11 1 42 +net_cls 5 1 42 +perf_event 3 1 42 +net_prio 5 1 42 +hugetlb 10 1 42 +pids 12 1 87 +rdma 7 1 42 +`, // include a trailing newline + expOut: []CGroupSubsystem{ + { + Subsys: "cpuset", + Hierarchy: 9, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "cpu", + Hierarchy: 2, + NumCGroups: 328, + Enabled: true, + }, { + Subsys: "cpuacct", + Hierarchy: 2, + NumCGroups: 328, + Enabled: true, + }, { + Subsys: "blkio", + Hierarchy: 4, + NumCGroups: 87, + Enabled: true, + }, { + Subsys: "memory", + Hierarchy: 8, + NumCGroups: 361, + Enabled: true, + }, { + Subsys: "devices", + Hierarchy: 6, + NumCGroups: 82, + Enabled: true, + }, { + Subsys: "freezer", + Hierarchy: 11, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "net_cls", + Hierarchy: 5, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "perf_event", + Hierarchy: 3, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "net_prio", + Hierarchy: 5, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "hugetlb", + Hierarchy: 10, + NumCGroups: 42, + Enabled: true, + }, { + Subsys: "pids", + Hierarchy: 12, + NumCGroups: 87, + Enabled: true, + }, { + Subsys: "rdma", + Hierarchy: 7, + NumCGroups: 42, + Enabled: true, + }, + }, + expErr: nil, + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_enabled", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu 0 179 k +cpuacct 0 179 1 +blkio 0 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse cgroup enabled: \"k\": strconv.ParseBool: parsing \"k\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_num_cgroups", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu 0 g 1 +cpuacct 0 179 1 +blkio 0 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse cgroup count: \"g\": strconv.Atoi: parsing \"g\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2_only_missing_enabled_row", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu 0 179 +cpuacct 0 179 1 +blkio 0 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unexpected number of columns 3 (doesn't match headers); expected 4"), + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_hierarchy", + contents: `#subsys_name hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu z 179 1 +cpuacct 0 179 1 +blkio 0 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse hierarchy number: \"z\": strconv.Atoi: parsing \"z\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_hierarchy_sans_enabled", + contents: `#subsys_name hierarchy num_cgroups +cpuset 0 179 +cpu z 179 +cpuacct 0 179 +blkio 0 179 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse hierarchy number: \"z\": strconv.Atoi: parsing \"z\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_num_cgroups_sans_enabled", + contents: `#subsys_name hierarchy num_cgroups +cpuset 0 179 +cpu 0 g +cpuacct 0 179 +blkio 0 179 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse cgroup count: \"g\": strconv.Atoi: parsing \"g\": invalid syntax"), + }, + { + name: "ubuntu_lunar_cgroup2_only_invalid_enabled_sans_num_cgroups", + contents: `#subsys_name hierarchy enabled +cpuset 0 1 +cpu 0 k +cpuacct 0 1 +blkio 0 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unable to parse cgroup enabled: \"k\": strconv.ParseBool: parsing \"k\": invalid syntax"), + }, + { + name: "missing_subsys_column", + contents: `#hierarchy num_cgroups enabled +cpuset 0 179 1 +cpu 0 179 1 +cpuacct 0 179 1 +blkio 0 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("missing critical column subsystem_name true or hierarchy false; columns: [\"hierarchy\" \"num_cgroups\" \"enabled\"]"), + }, + { + name: "empty_string", + contents: ``, // empty + expOut: nil, + expErr: errors.New("insufficient fields 0; need at least 2 (expected 4)"), + }, + { + name: "missing_hierarchy_column", + contents: `#subsys_name num_cgroups enabled +`, // include a trailing newline + expOut: nil, + expErr: errors.New("missing critical column subsystem_name false or hierarchy true; columns: [\"subsys_name\" \"num_cgroups\" \"enabled\"]"), + }, + { + name: "duplicate_enabled_column", + contents: `#subsys_name num_cgroups enabled enabled +`, // include a trailing newline + expOut: nil, + expErr: errors.New("multiple enabled columns at index 2 and 3"), + }, + { + name: "duplicate_subsys_column", + contents: `#subsys_name num_cgroups enabled subsys_name +`, // include a trailing newline + expOut: nil, + expErr: errors.New("multiple subsys_name columns at index 0 and 3"), + }, + { + name: "duplicate_hierarchy_column", + contents: `#hierarchy num_cgroups enabled hierarchy +`, // include a trailing newline + expOut: nil, + expErr: errors.New("multiple hierarchy columns at index 0 and 3"), + }, + { + name: "duplicate_num_cgroups_column", + contents: `#hierarchy num_cgroups enabled num_cgroups +`, // include a trailing newline + expOut: nil, + expErr: errors.New("multiple num_cgroups columns at index 1 and 3"), + }, + { + name: "missing_subsys_and_hierarchy_column", + contents: `#num_cgroups enabled +cpuset 179 1 +cpu 179 1 +cpuacct 179 1 +blkio 179 1 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("missing critical column subsystem_name true or hierarchy true; columns: [\"num_cgroups\" \"enabled\"]"), + }, + { + name: "ubuntu_lunar_cgroup2_only_sans_enabled_missing_cgroup_count_col", + contents: `#subsys_name hierarchy num_cgroups +cpuset 0 179 +cpu 0 +cpuacct 0 179 +`, // include a trailing newline + expOut: nil, + expErr: errors.New("failed to parse line 2: unexpected number of columns 2 (doesn't match headers); expected 3"), + }, + { + name: "ubuntu_lunar_cgroup2_only_sans_enabled", + contents: `#subsys_name hierarchy num_cgroups +cpuset 0 179 +cpu 0 179 +cpuacct 0 179 +blkio 0 179 +memory 0 179 +devices 0 179 +freezer 0 179 +net_cls 0 179 +perf_event 0 179 +net_prio 0 179 +hugetlb 0 179 +pids 0 179 +rdma 0 179 +misc 0 179 +`, // include a trailing newline + expOut: []CGroupSubsystem{ + { + Subsys: "cpuset", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpu", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpuacct", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "blkio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "memory", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "devices", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "freezer", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_cls", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "perf_event", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_prio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "hugetlb", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "pids", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "rdma", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "misc", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, + }, + expErr: nil, + }, + { + name: "ubuntu_lunar_cgroup2_only_sans_enabled", + contents: `#subsys_name hierarchy num_cgroups +cpuset 0 179 +cpu 0 179 +cpuacct 0 179 +blkio 0 179 +memory 0 179 +devices 0 179 +freezer 0 179 +net_cls 0 179 +perf_event 0 179 +net_prio 0 179 +hugetlb 0 179 +pids 0 179 +rdma 0 179 +misc 0 179 +`, // include a trailing newline + expOut: []CGroupSubsystem{ + { + Subsys: "cpuset", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpu", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "cpuacct", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "blkio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "memory", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "devices", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "freezer", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_cls", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "perf_event", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "net_prio", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "hugetlb", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "pids", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "rdma", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, { + Subsys: "misc", + Hierarchy: 0, + NumCGroups: 179, + Enabled: true, + }, + }, + expErr: nil, + }, + } { + tbl := itbl + t.Run(tbl.name, func(t *testing.T) { + cgph, parseErr := parseCGSubsystems(tbl.contents) + if parseErr != nil { + if tbl.expErr == nil { + t.Fatalf("unexpected error (expected nil): %s", parseErr) + } else if tbl.expErr.Error() != parseErr.Error() { + t.Fatalf("mismatched error:\n got %s\n want %s", parseErr, tbl.expErr) + } + return + } + if len(cgph) != len(tbl.expOut) { + t.Errorf("unexpected length %d; expected %d", len(cgph), len(tbl.expOut)) + } + for i, ss := range cgph { + if i >= len(tbl.expOut) { + t.Errorf("unexpected element %d at end of output: %+v", i, ss) + continue + } + exp := tbl.expOut[i] + if ss != exp { + t.Errorf("%d mismatched subsystem:\n got: %+v\n want: %+v", i, ss, exp) + } + } + }) + } +} diff --git a/cgrouplimits/host_linux.go b/cgrouplimits/host_linux.go index ea92aad..815036e 100644 --- a/cgrouplimits/host_linux.go +++ b/cgrouplimits/host_linux.go @@ -135,8 +135,10 @@ type hostMemInfo struct { // hostMemInfoFieldIdx is an index of the name in /proc/meminfo to the field // index in the hostMemInfo struct. -var hostMemInfoFieldIdx = pparser.NewLineKVFileParser(hostMemInfo{}, ":") -var hostVMStatFieldIdx = pparser.NewLineKVFileParser(hostVMStat{}, " ") +var ( + hostMemInfoFieldIdx = pparser.NewLineKVFileParser(hostMemInfo{}, ":") + hostVMStatFieldIdx = pparser.NewLineKVFileParser(hostVMStat{}, " ") +) // fields from /proc/vmstat pulled from "mm/vmstat.c" // generated with c&p of vmstat_text[] followed by some regexp mangling diff --git a/go.mod b/go.mod index 14209ca..17a6edb 100644 --- a/go.mod +++ b/go.mod @@ -1,17 +1,22 @@ module github.com/vimeo/procstats -go 1.18 +go 1.22.0 + +toolchain go1.22.10 require ( github.com/opencontainers/runc v1.0.0-rc9 + github.com/stretchr/testify v1.4.0 golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24 ) require ( + github.com/davecgh/go-spew v1.1.1 // indirect github.com/docker/go-units v0.4.0 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.1 // indirect github.com/opencontainers/runtime-spec v1.0.1 // indirect github.com/pkg/errors v0.8.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sirupsen/logrus v1.4.2 // indirect - github.com/stretchr/testify v1.4.0 // indirect + gopkg.in/yaml.v2 v2.2.2 // indirect ) diff --git a/go.sum b/go.sum index 7d2c399..73e2b7a 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,7 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24 h1:TyKJRhyo17yWxOMCTHKWrc5rddHORMlnZ/j57umaUd8= golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From a4816c3e60d8379da27aeeea36661bdd916727fb Mon Sep 17 00:00:00 2001 From: David Finkel Date: Fri, 6 Dec 2024 10:37:25 -0500 Subject: [PATCH 3/5] cgrouplimits: add cgroup2 support Replace runc/libcontainer with the new cgresolver package, and implement support for parsing the cgroup2 quota/usage files. (to its credit, cgroup2 does have a standardized format) --- cgrouplimits/cgroup_linux.go | 676 +++++++++++++++++++++++++++++------ cgrouplimits/cgroup_test.go | 58 ++- cgrouplimits/cpu.go | 5 +- cgrouplimits/memory.go | 1 + go.mod | 6 - go.sum | 15 - 6 files changed, 621 insertions(+), 140 deletions(-) diff --git a/cgrouplimits/cgroup_linux.go b/cgrouplimits/cgroup_linux.go index da2a92d..74459a2 100644 --- a/cgrouplimits/cgroup_linux.go +++ b/cgrouplimits/cgroup_linux.go @@ -4,196 +4,644 @@ package cgrouplimits import ( + "bytes" + "errors" "fmt" + "io/fs" + "math" "os" "path/filepath" "strconv" "strings" "time" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/vimeo/procstats" + "github.com/vimeo/procstats/cgresolver" "github.com/vimeo/procstats/pparser" ) -const cgroupCFSQuotaFile = "cpu.cfs_quota_us" -const cgroupCFSPeriodFile = "cpu.cfs_period_us" +const ( + cgroupCpuStatFile = "cpu.stat" + cgroupMemStatFile = "memory.stat" + + // cgroups V1 files + cgroupV1CFSQuotaFile = "cpu.cfs_quota_us" + cgroupV1CFSPeriodFile = "cpu.cfs_period_us" + + cgroupV1CpuUserUsageFile = "cpuacct.usage_user" + cgroupV1CpuSysUsageFile = "cpuacct.usage_sys" + cgroupV1CpuAcctStatFile = "cpuacct.stat" + + cgroupV1MemLimitFile = "memory.limit_in_bytes" + cgroupV1MemUsageFile = "memory.usage_in_bytes" + + cgroupV1MemOOMControlFile = "memory.oom_control" + + // cgroups V2 files + cgroupV2CFSQuotaPeriodFile = "cpu.max" + cgroupV2MemLimitFile = "memory.max" + cgroupV2MemEventsFile = "memory.events" + cgroupV2MemCurrentFile = "memory.current" +) -const cgroupMemLimitFile = "memory.limit_in_bytes" +func getCGroupCPULimitSingle(cpuPath *cgresolver.CGroupPath) (float64, error) { + switch cpuPath.Mode { + case cgresolver.CGModeV1: + f := os.DirFS(cpuPath.AbsPath) -const cgroupMemOOMControlFile = "memory.oom_control" + quotaµs, quotaReadErr := readIntValFile(f, cgroupV1CFSQuotaFile) + if quotaReadErr != nil { + return -1.0, fmt.Errorf("failed to read quota file %s", quotaReadErr) + } + periodµs, periodReadErr := readIntValFile(f, cgroupV1CFSPeriodFile) + if periodReadErr != nil { + return -1.0, fmt.Errorf("failed to read cfs period file: %s", periodReadErr) + } + if periodµs <= 0 { + return 0.0, nil + } + if quotaµs <= 0 { + return 0.0, nil + } + return float64(quotaµs) / float64(periodµs), nil + case cgresolver.CGModeV2: + maxPath := filepath.Join(cpuPath.AbsPath, cgroupV2CFSQuotaPeriodFile) + quotaStr, quotaReadErr := os.ReadFile(maxPath) + if quotaReadErr != nil { + return -1.0, fmt.Errorf("failed to read max CPU file %q: %w", maxPath, quotaReadErr) + } + maxParts := strings.Fields(string(quotaStr)) + if len(maxParts) != 2 { + return -1.0, fmt.Errorf("unable to parse %q; unexpected number of components: %d", maxPath, len(maxParts)) + } + if maxParts[0] == "max" { + // max == no limit :) + return 0.0, nil + } + limitμs, parseLimitErr := strconv.Atoi(maxParts[0]) + if parseLimitErr != nil { + return -1.0, fmt.Errorf("failed to parse limit component of %q as integer: %w", + maxPath, parseLimitErr) + } + periodμs, parsePeriodErr := strconv.Atoi(maxParts[1]) + if parsePeriodErr != nil { + return -1.0, fmt.Errorf("failed to parse period component of %q as integer: %w", + maxPath, parsePeriodErr) + } + if limitμs <= 0 || periodμs <= 0 { + return 0.0, nil + } + + return float64(limitμs) / float64(periodμs), nil + default: + return -1.0, fmt.Errorf("unknown cgroup type: %d", cpuPath.Mode) + } +} // GetCgroupCPULimit fetches the Cgroup's CPU limit func GetCgroupCPULimit() (float64, error) { - cpuPath, cgroupFindErr := cgroups.GetOwnCgroupPath("cpu") + cpuPath, cgroupFindErr := cgresolver.SelfSubsystemPath("cpu") if cgroupFindErr != nil { return -1.0, fmt.Errorf("unable to find cgroup directory: %s", cgroupFindErr) } - quotaFilePath := filepath.Join(cpuPath, cgroupCFSQuotaFile) - quotaStr, quotaReadErr := os.ReadFile(quotaFilePath) - if quotaReadErr != nil { - return -1.0, fmt.Errorf("failed to read quota file %q: %s", quotaFilePath, quotaReadErr) - } - enforcePeriodFilePath := filepath.Join(cpuPath, cgroupCFSPeriodFile) - enforcePeriodStr, periodReadErr := os.ReadFile(enforcePeriodFilePath) - if periodReadErr != nil { - return -1.0, fmt.Errorf("failed to read cfs period file %q: %s", - enforcePeriodFilePath, periodReadErr) - } - quotaµs, parseQuotaErr := strconv.Atoi(strings.TrimSpace(string(quotaStr))) - if parseQuotaErr != nil { - return -1.0, fmt.Errorf("failed to parse contents of %q as integer: %s", - quotaFilePath, parseQuotaErr) - } - periodµs, parsePeriodErr := strconv.Atoi(strings.TrimSpace(string(enforcePeriodStr))) - if parsePeriodErr != nil { - return -1.0, fmt.Errorf("failed to parse contents of %q as integer: %s", - enforcePeriodFilePath, parsePeriodErr) - } + minLimit := math.Inf(+1) + allFailed := true + leafCGReadErr := error(nil) + + for newDir := true; newDir; cpuPath, newDir = cpuPath.Parent() { + cgLim, cgReadErr := getCGroupCPULimitSingle(&cpuPath) + if cgReadErr != nil { + if leafCGReadErr == nil && allFailed { + leafCGReadErr = cgReadErr + } + continue + } - if periodµs <= 0 { - return 0.0, nil + allFailed = false + if (cgLim != -1 && cgLim != 0.0) && cgLim < minLimit { + minLimit = cgLim + } } - if quotaµs <= 0 { - return 0.0, nil + if allFailed { + return -1, leafCGReadErr } - return float64(quotaµs) / float64(periodµs), nil + return minLimit, nil } // GetCgroupMemoryLimit looks up the current process's memory cgroup, and // returns the memory limit. func GetCgroupMemoryLimit() (int64, error) { - memPath, cgroupFindErr := cgroups.GetOwnCgroupPath("memory") + memPath, cgroupFindErr := cgresolver.SelfSubsystemPath("memory") if cgroupFindErr != nil { return -1, fmt.Errorf("unable to find cgroup directory: %s", cgroupFindErr) } - limitFilePath := filepath.Join(memPath, cgroupMemLimitFile) - limitFileContents, limitReadErr := os.ReadFile(limitFilePath) - if limitReadErr != nil { - return -1, fmt.Errorf("failed to read cgroup memory limit file %q: %s", - limitFilePath, limitReadErr) + memLimitFilename := "" + switch memPath.Mode { + case cgresolver.CGModeV1: + memLimitFilename = cgroupV1MemLimitFile + case cgresolver.CGModeV2: + memLimitFilename = cgroupV2MemLimitFile + default: + return -1, fmt.Errorf("unknown cgroup type: %d", memPath.Mode) + } + + minLimit := int64(math.MaxInt64) + + allFailed := true + leafCGReadErr := error(nil) + + for newDir := true; newDir; memPath, newDir = memPath.Parent() { + f := os.DirFS(memPath.AbsPath) + + limitBytes, limitReadErr := readIntValFile(f, memLimitFilename) + if limitReadErr != nil { + if leafCGReadErr == nil && allFailed { + leafCGReadErr = fmt.Errorf("failed to read cgroup memory limit file %s", limitReadErr) + } + continue + } + allFailed = false + if limitBytes > 0 && limitBytes < minLimit { + minLimit = limitBytes + } } - limitBytes, parseLimitErr := strconv.ParseInt(strings.TrimSpace(string(limitFileContents)), 10, 64) - if parseLimitErr != nil { - return -1, fmt.Errorf("failed to parse contents of %q as integer: %s", - limitFilePath, parseLimitErr) + if allFailed { + return -1, leafCGReadErr + } + return minLimit, nil +} + +type cg1MemoryStatContents struct { + Cache int64 `pparser:"cache"` + RSS int64 `pparser:"rss"` + RSSHuge int64 `pparser:"rss_huge"` + Shmem int64 `pparser:"shmem"` + MappedFile int64 `pparser:"mapped_file"` + Dirty int64 `pparser:"dirty"` + Writeback int64 `pparser:"writeback"` + WorkingsetRefaultAnon int64 `pparser:"workingset_refault_anon"` + WorkingsetRefaultFile int64 `pparser:"workingset_refault_file"` + Swap int64 `pparser:"swap"` + PgpgIn int64 `pparser:"pgpgin"` + PgpgOut int64 `pparser:"pgpgout"` + Pgfault int64 `pparser:"pgfault"` + Pgmajfault int64 `pparser:"pgmajfault"` + InactiveAnon int64 `pparser:"inactive_anon"` + ActiveAnon int64 `pparser:"active_anon"` + InactiveFile int64 `pparser:"inactive_file"` + ActiveFile int64 `pparser:"active_file"` + Unevictable int64 `pparser:"unevictable"` + HierarchicalMemoryLimit int64 `pparser:"hierarchical_memory_limit"` + HierarchicalMemswLimit int64 `pparser:"hierarchical_memsw_limit"` + TotalCache int64 `pparser:"total_cache"` + TotalRSS int64 `pparser:"total_rss"` + TotalRSSHuge int64 `pparser:"total_rss_huge"` + TotalShmem int64 `pparser:"total_shmem"` + TotalMappedFile int64 `pparser:"total_mapped_file"` + TotalDirty int64 `pparser:"total_dirty"` + TotalWriteback int64 `pparser:"total_writeback"` + TotalWorkingsetRefaultAnon int64 `pparser:"total_workingset_refault_anon"` + TotalWorkingsetRefaultFile int64 `pparser:"total_workingset_refault_file"` + TotalSwap int64 `pparser:"total_swap"` + TotalPgpgIn int64 `pparser:"total_pgpgin"` + TotalPgpgOut int64 `pparser:"total_pgpgout"` + TotalPgFault int64 `pparser:"total_pgfault"` + TotalPgMajFault int64 `pparser:"total_pgmajfault"` + TotalInactiveAnon int64 `pparser:"total_inactive_anon"` + TotalActiveAnon int64 `pparser:"total_active_anon"` + TotalInactiveFile int64 `pparser:"total_inactive_file"` + TotalActiveFile int64 `pparser:"total_active_file"` + TotalUnevictable int64 `pparser:"total_unevictable"` + + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg1MemStatFieldIdx = pparser.NewLineKVFileParser(cg1MemoryStatContents{}, " ") + +type cg2MemoryStatContents struct { + Anon int64 `pparser:"anon"` + File int64 `pparser:"file"` + Kernel int64 `pparser:"kernel"` + KernelStack int64 `pparser:"kernel_stack"` + Pagetables int64 `pparser:"pagetables"` + SecondaryPagetables int64 `pparser:"sec_pagetables"` + PerCPU int64 `pparser:"percpu"` + Sock int64 `pparser:"sock"` + VMAlloc int64 `pparser:"vmalloc"` + Shmem int64 `pparser:"shmem"` + Zswap int64 `pparser:"zswap"` + Zswapped int64 `pparser:"zswapped"` + FileMapped int64 `pparser:"file_mapped"` + FileDirty int64 `pparser:"file_dirty"` + FileWriteback int64 `pparser:"file_writeback"` + SwapCached int64 `pparser:"swapcached"` + AnonTHP int64 `pparser:"anon_thp"` + FileTHP int64 `pparser:"file_thp"` + ShmemTHP int64 `pparser:"shmem_thp"` + InactiveAnon int64 `pparser:"inactive_anon"` + ActiveAnon int64 `pparser:"active_anon"` + InactiveFile int64 `pparser:"inactive_file"` + ActiveFile int64 `pparser:"active_file"` + Unevictable int64 `pparser:"unevictable"` + SlabReclaimable int64 `pparser:"slab_reclaimable"` + SlabUnreclaimable int64 `pparser:"slab_unreclaimable"` + SlabTotal int64 `pparser:"slab"` + WorkingsetRefaultAnon int64 `pparser:"workingset_refault_anon"` + WorkingsetRefaultFile int64 `pparser:"workingset_refault_file"` + WorkingsetActivateAnon int64 `pparser:"workingset_activate_anon"` + WorkingsetActivateFile int64 `pparser:"workingset_activate_file"` + WorkingsetRestoreAnon int64 `pparser:"workingset_restore_anon"` + WorkingsetRestoreFile int64 `pparser:"workingset_restore_file"` + WorkingsetNodeReclaim int64 `pparser:"workingset_nodereclaim"` + PgScan int64 `pparser:"pgscan"` + PgSteal int64 `pparser:"pgsteal"` + PgScanKswapd int64 `pparser:"pgscan_kswapd"` + PgscanDirect int64 `pparser:"pgscan_direct"` + PgstealKswapd int64 `pparser:"pgsteal_kswapd"` + PgstealDirect int64 `pparser:"pgsteal_direct"` + PgFault int64 `pparser:"pgfault"` + PgMajFault int64 `pparser:"pgmajfault"` + PgRefill int64 `pparser:"pgrefill"` + PgActivate int64 `pparser:"pgactivate"` + PgDeactivate int64 `pparser:"pgdeactivate"` + PgLazyFree int64 `pparser:"pglazyfree"` + PgLazyFreed int64 `pparser:"pglazyfreed"` + ZswpIn int64 `pparser:"zswpin"` + ZswpOut int64 `pparser:"zswpout"` + ThpFaultAlloc int64 `pparser:"thp_fault_alloc"` + ThpCollapseAlloc int64 `pparser:"thp_collapse_alloc"` + + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg2MemStatFieldIdx = pparser.NewLineKVFileParser(cg2MemoryStatContents{}, " ") + +type cg2MemEvents struct { + Low int64 `pparser:"low"` + High int64 `pparser:"high"` + Max int64 `pparser:"max"` + OOMs int64 `pparser:"oom"` + OOMKills int64 `pparser:"oom_kill"` + OOMGroupKill int64 `pparser:"oom_group_kill"` + + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg2MemEventsFieldIdx = pparser.NewLineKVFileParser(cg2MemEvents{}, " ") + +// second return value is the memory limit for this CGroup (-1 is none) +func getCGroupMemoryStatsSingle(memPath *cgresolver.CGroupPath) (MemoryStats, int64, error) { + switch memPath.Mode { + case cgresolver.CGModeV1: + f := os.DirFS(memPath.AbsPath) + ooms, oomErr := getV1CgroupOOMs() + if oomErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to look up OOMKills: %s", + oomErr) + } + + limitBytes, limitErr := readIntValFile(f, cgroupV1MemLimitFile) + if limitErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to read limit: %w", limitErr) + } + + usageBytes, usageErr := readIntValFile(f, cgroupV1MemUsageFile) + if usageErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to read memory usage: %w", usageErr) + } + + mstContents, readErr := os.ReadFile(filepath.Join(memPath.AbsPath, cgroupMemStatFile)) + if readErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to read memory.stat file for cgroup (%q): %w", + filepath.Join(memPath.AbsPath, cgroupMemStatFile), readErr) + } + cg1Stats := cg1MemoryStatContents{} + if parseErr := cg1MemStatFieldIdx.Parse(mstContents, &cg1Stats); parseErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to parse memory.stat file for cgroup (%q): %w", + filepath.Join(memPath.AbsPath, cgroupCpuStatFile), parseErr) + } + + ms := MemoryStats{ + Total: limitBytes, + Free: limitBytes - usageBytes, + Available: limitBytes - usageBytes + cg1Stats.TotalCache, + OOMKills: int64(ooms), + } + return ms, limitBytes, nil + case cgresolver.CGModeV2: + f := os.DirFS(memPath.AbsPath) + mstContents, memStatErr := fs.ReadFile(f, cgroupMemStatFile) + if memStatErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to read memory.stat: %w", memStatErr) + } + cg2Stats := cg2MemoryStatContents{} + if parseErr := cg2MemStatFieldIdx.Parse(mstContents, &cg2Stats); parseErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to parse memory.stat file for cgroup (%q): %w", + filepath.Join(memPath.AbsPath, cgroupMemStatFile), parseErr) + } + mevContents, memEventsErr := fs.ReadFile(f, cgroupV2MemEventsFile) + if memEventsErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to read memory.events: %w", memEventsErr) + } + cg2Events := cg2MemEvents{} + if parseErr := cg2MemEventsFieldIdx.Parse(mevContents, &cg2Events); parseErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to parse memory.events file for cgroup (%q): %w", + filepath.Join(memPath.AbsPath, cgroupV2MemEventsFile), parseErr) + } + + usageBytes, usageErr := readIntValFile(f, cgroupV2MemCurrentFile) + if usageErr != nil { + return MemoryStats{}, -1, fmt.Errorf("failed to parse memory.current file for cgroup : %w", usageErr) + } + limitBytes, limitReadErr := readIntValFile(f, cgroupV2MemLimitFile) + if limitReadErr != nil { + if !errors.Is(limitReadErr, fs.ErrNotExist) { + return MemoryStats{}, -1, fmt.Errorf("failed to read cgroup memory limit file %s", + limitReadErr) + } + limitBytes = -1 + } + + return MemoryStats{ + Total: limitBytes, + Free: limitBytes - usageBytes, + // TODO: verify that nothing here is getting double-counted + // subtract total usage from the limit, and add back some memory-categories that can be evicted. + // Notably, cached swap can be evicted immediately, as can any File memory that's not dirty or getting written back. + // SlabReclaimable is kernel memory that can be freed under memory pressure. + Available: limitBytes - usageBytes + cg2Stats.SwapCached + (cg2Stats.File - cg2Stats.FileDirty - cg2Stats.FileWriteback) + cg2Stats.SlabReclaimable, + OOMKills: cg2Events.OOMGroupKill, + }, limitBytes, nil + default: + return MemoryStats{}, -1, fmt.Errorf("unknown cgroup type: %d", memPath.Mode) } - return limitBytes, nil } // GetCgroupMemoryStats queries the current process's memory cgroup's memory // usage/limits. func GetCgroupMemoryStats() (MemoryStats, error) { - memPath, cgroupFindErr := cgroups.GetOwnCgroupPath("memory") + memPath, cgroupFindErr := cgresolver.SelfSubsystemPath("memory") if cgroupFindErr != nil { return MemoryStats{}, fmt.Errorf("unable to find cgroup directory: %s", cgroupFindErr) } - mg := fs.MemoryGroup{} - st := cgroups.NewStats() - if err := mg.GetStats(memPath, st); err != nil { - return MemoryStats{}, fmt.Errorf("failed to query memory stats: %s", err) - } - msUsage := st.MemoryStats.Usage - ooms, oomErr := getCgroupOOMs(memPath) - if oomErr != nil { - return MemoryStats{}, fmt.Errorf("failed to look up OOMKills: %s", - oomErr) - } + minLimit := uint64(math.MaxUint64) + minLimCGMemStats := MemoryStats{} + leafCGReadErr := error(nil) + + allFailed := true + + for newDir := true; newDir; memPath, newDir = memPath.Parent() { + cgMemStats, cgLim, cgReadErr := getCGroupMemoryStatsSingle(&memPath) + if cgReadErr != nil { + if leafCGReadErr == nil && allFailed { + leafCGReadErr = cgReadErr + } + continue + } - ms := MemoryStats{ - Total: int64(msUsage.Limit), - Free: int64(msUsage.Limit) - int64(msUsage.Usage), - Available: int64(msUsage.Limit) - int64(msUsage.Usage) + - int64(st.MemoryStats.Cache), - OOMKills: int64(ooms), + allFailed = false + if cgLim != -1 && uint64(cgLim) < minLimit { + minLimit = uint64(cgLim) + minLimCGMemStats = cgMemStats + } } - return ms, nil + if allFailed { + return MemoryStats{}, leafCGReadErr + } + return minLimCGMemStats, nil } -// MemCgroupOOMControl contains the parsed contents of the cgroup's -// memory.oom_control file. -// Note that this struct is a linux-specific data-structure that should not be -// used in portable applications. -type MemCgroupOOMControl struct { +type memCgroupOOMControl struct { OomKillDisable int64 `pparser:"oom_kill_disable"` UnderOom int64 `pparser:"under_oom"` OomKill int64 `pparser:"oom_kill"` UnknownFields map[string]int64 `pparser:"skip,unknown"` } -// ReadCGroupOOMControl reads the oom_control file for the cgroup directory -// passed as an argument. Parsing the contents into a MemCgroupOOMControl -// struct. -// Note that this is a non-portable linux-specific function that should not be -// used in portable applications. -func ReadCGroupOOMControl(memCgroupPath string) (MemCgroupOOMControl, error) { - oomControlPath := filepath.Join(memCgroupPath, cgroupMemOOMControlFile) +var memCgroupOOMControlFieldIdx = pparser.NewLineKVFileParser(memCgroupOOMControl{}, " ") + +// getV1CgroupOOMs looks up the current number of oom kills for the current cgroup. +func getV1CgroupOOMs() (int32, error) { + memPath, cgroupFindErr := cgresolver.SelfSubsystemPath("memory") + if cgroupFindErr != nil { + return -1, fmt.Errorf("unable to find cgroup directory: %s", cgroupFindErr) + } + oomControlPath := filepath.Join(memPath.AbsPath, cgroupV1MemOOMControlFile) oomControlBytes, oomControlReadErr := os.ReadFile(oomControlPath) if oomControlReadErr != nil { - return MemCgroupOOMControl{}, fmt.Errorf( + return 0, fmt.Errorf( "failed to read contents of %q: %s", oomControlPath, oomControlReadErr) } - oomc := MemCgroupOOMControl{} + oomc := memCgroupOOMControl{} parseErr := memCgroupOOMControlFieldIdx.Parse(oomControlBytes, &oomc) if parseErr != nil { - return MemCgroupOOMControl{}, parseErr + return 0, parseErr + } + + // The oom_kill line was only added to the oom_control file in linux + // 4.13, so some systems (docker for Mac) don't have it. + return int32(oomc.OomKill), nil +} + +type cg2CPUStatContents struct { + Usageμs int64 `pparser:"usage_usec"` + Userμs int64 `pparser:"user_usec"` + Sysμs int64 `pparser:"system_usec"` + TotalPeriods int64 `pparser:"nr_periods"` + ThrottledPeriods int64 `pparser:"nr_throttled"` + Throttledμs int64 `pparser:"throttled_usec"` + BurstCount int64 `pparser:"nr_bursts"` + Burstμs int64 `pparser:"burst_usec"` + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg2CPUStatContentsFieldIdx = pparser.NewLineKVFileParser(cg2CPUStatContents{}, " ") + +type cg1CPUStatContents struct { + TotalPeriods int64 `pparser:"nr_periods"` + ThrottledPeriods int64 `pparser:"nr_throttled"` + Throttledns int64 `pparser:"throttled_time"` + BurstCount int64 `pparser:"nr_bursts"` + Burstns int64 `pparser:"burst_time"` + Waitns int64 `pparser:"wait_sum"` + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg1CPUStatContentsFieldIdx = pparser.NewLineKVFileParser(cg1CPUStatContents{}, " ") + +type cg1CPUAcctStatContents struct { + UserTicks int64 `pparser:"user"` + SysTicks int64 `pparser:"system"` + UnknownFields map[string]int64 `pparser:"skip,unknown"` +} + +var cg1CPUAcctStatContentsFieldIdx = pparser.NewLineKVFileParser(cg1CPUAcctStatContents{}, " ") + +func readIntValFile(f fs.FS, path string) (int64, error) { + conts, readErr := fs.ReadFile(f, path) + if readErr != nil { + return -1, fmt.Errorf("failed to read %q: %w", path, readErr) + } + trimmedConts := bytes.TrimSpace(conts) + if bytes.Equal(trimmedConts, []byte("max")) { + return math.MaxInt64, nil + } + v, parseErr := strconv.ParseInt(string(trimmedConts), 10, 64) + if parseErr != nil { + return -1, fmt.Errorf("failed to parse %q (%q) as integer: %w", path, trimmedConts, parseErr) } - return oomc, nil + return v, nil } -var memCgroupOOMControlFieldIdx = pparser.NewLineKVFileParser(MemCgroupOOMControl{}, " ") +func cgroupV1ReadCPUAcctStats(f fs.FS) (procstats.CPUTime, error) { + cStatsBytes, readErr := fs.ReadFile(f, cgroupV1CpuAcctStatFile) + if readErr != nil { + return procstats.CPUTime{}, fmt.Errorf("failed to read cpuacct.stat file: %w", readErr) + } + cStats := cg1CPUAcctStatContents{} + if parseErr := cg1CPUAcctStatContentsFieldIdx.Parse(cStatsBytes, &cStats); parseErr != nil { + return procstats.CPUTime{}, fmt.Errorf("failed to parse cpuacct.stat: %w", parseErr) + } + return procstats.CPUTime{ + Utime: time.Duration(cStats.UserTicks) * 10 * time.Millisecond, + Stime: time.Duration(cStats.SysTicks) * 10 * time.Millisecond, + }, nil + +} + +// CGroupV1CPUUsage reads the CPU usage for a specific V1 cpuacct CGroup (and descendants) +// The fs.FS arg will usually be from os.DirFS, but may be any other fs.FS implementation. +func CGroupV1CPUUsage(f fs.FS) (procstats.CPUTime, error) { + userCPUNS, userReadErr := readIntValFile(f, cgroupV1CpuUserUsageFile) + if userReadErr != nil { + if errors.Is(userReadErr, fs.ErrNotExist) { + // fall back to reading just the cpuacct.stat file + return cgroupV1ReadCPUAcctStats(f) + } -// getCgroupOOMs looks up the current number of oom kills for the cgroup -// specified by the path in its argument. -func getCgroupOOMs(memCgroupPath string) (int32, error) { - oomc, readErr := ReadCGroupOOMControl(memCgroupPath) + return procstats.CPUTime{}, fmt.Errorf("failed to read userspace CPU-time: %w", userReadErr) + } + sysCPUNS, sysReadErr := readIntValFile(f, cgroupV1CpuSysUsageFile) + if sysReadErr != nil { + return procstats.CPUTime{}, fmt.Errorf("failed to read kernelspace CPU-time: %w", sysReadErr) + } + + return procstats.CPUTime{ + Utime: time.Duration(userCPUNS) * time.Nanosecond, + Stime: time.Duration(sysCPUNS) * time.Nanosecond, + }, nil +} + +// CGroupV2CPUUsage reads the CPU usage for a specific V2 cpu CGroup (and descendants) +// The fs.FS arg will usually be from os.DirFS, but may be any other fs.FS implementation. +func CGroupV2CPUUsage(f fs.FS) (CPUStats, error) { + cstContents, readErr := fs.ReadFile(f, cgroupCpuStatFile) if readErr != nil { - return 0, readErr + return CPUStats{}, fmt.Errorf("failed to read cpu.stat file for cgroup: %w", + readErr) } + cg2Stats := cg2CPUStatContents{} + if parseErr := cg2CPUStatContentsFieldIdx.Parse(cstContents, &cg2Stats); parseErr != nil { + return CPUStats{}, fmt.Errorf("failed to parse cpu.stat file for cgroup: %w", + readErr) + } + return CPUStats{ + Usage: procstats.CPUTime{ + Utime: time.Duration(cg2Stats.Userμs) * time.Microsecond, + Stime: time.Duration(cg2Stats.Sysμs) * time.Microsecond, + }, + ThrottledTime: time.Duration(cg2Stats.Throttledμs) * time.Microsecond, + }, nil +} - // The oom_kill line was only added to the oom_control file in linux - // 4.13, so some systems (docker for Mac) don't have it. - return int32(oomc.OomKill), nil +func getCGroupCPUStatsSingle(cpuPath *cgresolver.CGroupPath) (CPUStats, float64, error) { + lim, limErr := getCGroupCPULimitSingle(cpuPath) + if limErr != nil { + if !errors.Is(limErr, fs.ErrNotExist) { + return CPUStats{}, -1, fmt.Errorf("failed to read CPU limit: %w", limErr) + } + lim = -1.0 + } + switch cpuPath.Mode { + case cgresolver.CGModeV1: + cstContents, readErr := os.ReadFile(filepath.Join(cpuPath.AbsPath, cgroupCpuStatFile)) + if readErr != nil { + return CPUStats{}, -1, fmt.Errorf("failed to read cpu.stat file for cgroup (%q): %w", + filepath.Join(cpuPath.AbsPath, cgroupCpuStatFile), readErr) + } + cg1Stats := cg1CPUStatContents{} + if parseErr := cg1CPUStatContentsFieldIdx.Parse(cstContents, &cg1Stats); parseErr != nil { + return CPUStats{}, -1, fmt.Errorf("failed to parse cpu.stat file for cgroup (%q): %w", + filepath.Join(cpuPath.AbsPath, cgroupCpuStatFile), readErr) + } + cpuAcctPath, cgroupFindErr := cgresolver.SelfSubsystemPath("cpuacct") + if cgroupFindErr != nil { + return CPUStats{}, -1, fmt.Errorf("unable to find cgroup directory: %s", + cgroupFindErr) + } + f := os.DirFS(cpuAcctPath.AbsPath) + usage, usageErr := CGroupV1CPUUsage(f) + if usageErr != nil { + return CPUStats{}, -1, fmt.Errorf("failed to query usage: %w", usageErr) + } + return CPUStats{ + Usage: usage, + ThrottledTime: time.Duration(cg1Stats.Throttledns) * time.Nanosecond, + }, lim, nil + + case cgresolver.CGModeV2: + f := os.DirFS(cpuPath.AbsPath) + cpuStat, usageErr := CGroupV2CPUUsage(f) + return cpuStat, lim, usageErr + default: + return CPUStats{}, -1, fmt.Errorf("unknown cgroup type: %d", cpuPath.Mode) + } } // GetCgroupCPUStats queries the current process's memory cgroup's CPU // usage/limits. func GetCgroupCPUStats() (CPUStats, error) { - cpuPath, cgroupFindErr := cgroups.GetOwnCgroupPath("cpu") - if cgroupFindErr != nil { - return CPUStats{}, fmt.Errorf("unable to find cgroup directory: %s", - cgroupFindErr) - } - cpuAcctPath, cgroupFindErr := cgroups.GetOwnCgroupPath("cpuacct") + cpuPath, cgroupFindErr := cgresolver.SelfSubsystemPath("cpu") if cgroupFindErr != nil { return CPUStats{}, fmt.Errorf("unable to find cgroup directory: %s", cgroupFindErr) } - cg := fs.CpuGroup{} - st := cgroups.NewStats() - if err := cg.GetStats(cpuPath, st); err != nil { - return CPUStats{}, fmt.Errorf("failed to query CPU throttle stats: %s", err) + minLimit := math.Inf(+1) + minCPUStats := CPUStats{} + allFailed := true + leafCGReadErr := error(nil) + + cpuStatsPopulated := false + leafCPUStats := CPUStats{} + + for newDir := true; newDir; cpuPath, newDir = cpuPath.Parent() { + cgCPUStats, cgLim, cgReadErr := getCGroupCPUStatsSingle(&cpuPath) + if cgReadErr != nil { + if leafCGReadErr == nil && allFailed { + leafCGReadErr = cgReadErr + } + continue + } + if !cpuStatsPopulated { + leafCPUStats = cgCPUStats + cpuStatsPopulated = true + } + + allFailed = false + if (cgLim != -1 && cgLim != 0.0) && cgLim < minLimit { + minLimit = cgLim + minCPUStats = cgCPUStats + } } - cag := fs.CpuacctGroup{} - if err := cag.GetStats(cpuAcctPath, st); err != nil { - return CPUStats{}, fmt.Errorf("failed to query CPU acct stats: %s", err) + if allFailed { + return CPUStats{}, leafCGReadErr } - - cs := CPUStats{ - Usage: procstats.CPUTime{ - Utime: time.Duration(st.CpuStats.CpuUsage.UsageInUsermode) * - time.Nanosecond, - Stime: time.Duration(st.CpuStats.CpuUsage.UsageInKernelmode) * - time.Nanosecond, - }, - ThrottledTime: time.Duration(st.CpuStats.ThrottlingData.ThrottledTime) * - time.Nanosecond, + if math.IsInf(minLimit, +1) { + // if the limit is still infinite, return the first successfully read stats (the farthest out the leaf) + return leafCPUStats, nil } - - return cs, nil + return minCPUStats, nil } diff --git a/cgrouplimits/cgroup_test.go b/cgrouplimits/cgroup_test.go index 384468a..9001853 100644 --- a/cgrouplimits/cgroup_test.go +++ b/cgrouplimits/cgroup_test.go @@ -1,6 +1,10 @@ package cgrouplimits -import "testing" +import ( + "math" + "testing" + "time" +) func TestCgroupCPULimitsRead(t *testing.T) { limit, err := GetCgroupCPULimit() @@ -14,8 +18,8 @@ func TestCgroupCPULimitsRead(t *testing.T) { if limit < 0.0 { t.Errorf("unexpectedly negative limit: %g", limit) } - if limit > 10000.0 { - t.Errorf("unexpectedly large limit: %g", limit) + if limit > 10000.0 && !math.IsInf(limit, +1) { + t.Errorf("unexpectedly large limit (not infinite): %g", limit) } } @@ -26,7 +30,7 @@ func TestCgroupMemLimitsRead(t *testing.T) { } if err != nil { - t.Fatalf("failed to query CPU limit: %s", err) + t.Fatalf("failed to query Memory limit: %s", err) } if limit < 0 { t.Errorf("unexpectedly negative limit: %d", limit) @@ -35,3 +39,49 @@ func TestCgroupMemLimitsRead(t *testing.T) { t.Errorf("unexpectedly small limit (less than a page): %d", limit) } } + +func TestCgroupMemStatsRead(t *testing.T) { + stats, err := GetCgroupMemoryStats() + if err == ErrCGroupsNotSupported { + t.Skip("unsupported platform") + } + + if err != nil { + t.Fatalf("failed to query Memory usage: %s", err) + } + if stats.Total < 0 { + t.Errorf("unexpectedly negative usage: %d", stats.Total) + } + if stats.Total < 4096 { + t.Errorf("unexpectedly small usage (less than a page): %d", stats.Total) + } + if stats.OOMKills < 0 { + t.Errorf("unexpectedly negative OOM-kill ount: %d", stats.OOMKills) + } +} + +func TestCgroupCPUStatsRead(t *testing.T) { + stats, err := GetCgroupCPUStats() + if err == ErrCGroupsNotSupported { + t.Skip("unsupported platform") + } + + if err != nil { + t.Fatalf("failed to query Memory usage: %s", err) + } + if stats.Usage.Stime < 0 { + t.Errorf("unexpectedly negative system usage: %s", stats.Usage.Stime) + } + if stats.Usage.Stime < time.Microsecond { + t.Errorf("unexpectedly small system usage: %s", stats.Usage.Stime) + } + if stats.Usage.Utime < 0 { + t.Errorf("unexpectedly negative user usage: %s", stats.Usage.Utime) + } + if stats.Usage.Utime < time.Microsecond { + t.Errorf("unexpectedly small user usage: %s", stats.Usage.Utime) + } + if stats.ThrottledTime < 0 { + t.Errorf("unexpectedly negative throttled time: %s", stats.ThrottledTime) + } +} diff --git a/cgrouplimits/cpu.go b/cgrouplimits/cpu.go index 0191fd6..964041a 100644 --- a/cgrouplimits/cpu.go +++ b/cgrouplimits/cpu.go @@ -1,3 +1,6 @@ +// Package cgrouplimits provides abstractions for getting resource usage on various +// platforms and environments. e.g. it supports running with and +// without cgroups (containers) as well as darwin. package cgrouplimits import ( @@ -35,7 +38,7 @@ type CPUStats struct { // CPUStat queries the current system-state for CPU usage and limits. // Limit is always filled in, other fields are only present if there's a // non-nil error. -// Currently only works within cgroups with memory-limits (CS-34) +// Currently only works within cgroups with cpu-limits (CS-34) func CPUStat() (CPUStats, error) { cgcpustats, err := GetCgroupCPUStats() // TODO(CS-34): implement a host-level fallback for the non-l-limit diff --git a/cgrouplimits/memory.go b/cgrouplimits/memory.go index 53b9388..101d41b 100644 --- a/cgrouplimits/memory.go +++ b/cgrouplimits/memory.go @@ -2,6 +2,7 @@ package cgrouplimits // MemoryStats encapsulates memory limits, usage and available. type MemoryStats struct { + // Total memory in the container/system Total int64 // Free treats data in the kernel-page-cache for the cgroup/system as // "used" diff --git a/go.mod b/go.mod index 17a6edb..7e68c0f 100644 --- a/go.mod +++ b/go.mod @@ -5,18 +5,12 @@ go 1.22.0 toolchain go1.22.10 require ( - github.com/opencontainers/runc v1.0.0-rc9 github.com/stretchr/testify v1.4.0 golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/docker/go-units v0.4.0 // indirect - github.com/konsorten/go-windows-terminal-sequences v1.0.1 // indirect - github.com/opencontainers/runtime-spec v1.0.1 // indirect - github.com/pkg/errors v0.8.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/sirupsen/logrus v1.4.2 // indirect gopkg.in/yaml.v2 v2.2.2 // indirect ) diff --git a/go.sum b/go.sum index 73e2b7a..d416553 100644 --- a/go.sum +++ b/go.sum @@ -1,26 +1,11 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/opencontainers/runc v1.0.0-rc9 h1:/k06BMULKF5hidyoZymkoDCzdJzltZpz/UU4LguQVtc= -github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runtime-spec v1.0.1 h1:wY4pOY8fBdSIvs9+IDHC55thBuEulhzfSgKeC1yFvzQ= -github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24 h1:TyKJRhyo17yWxOMCTHKWrc5rddHORMlnZ/j57umaUd8= golang.org/x/sys v0.0.0-20220823224334-20c2bfdbfe24/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= From 368da166d3ce454bdb3f9cac9870d077746e3798 Mon Sep 17 00:00:00 2001 From: David Finkel Date: Fri, 6 Dec 2024 10:49:26 -0500 Subject: [PATCH 4/5] Add an unexported .eq method to CPUTime This helper simplifies a couple test-conditions. --- proc_read_linux_test.go | 2 +- proc_stats_linux.go | 3 ++- pstats.go | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/proc_read_linux_test.go b/proc_read_linux_test.go index a9c9ec7..bb2de32 100644 --- a/proc_read_linux_test.go +++ b/proc_read_linux_test.go @@ -91,7 +91,7 @@ func TestReadCPUUsage(t *testing.T) { if err != nil { t.Fatal(err) } - if ct == (CPUTime{}) { + if ct.eq(&CPUTime{}) { t.Errorf("want: , got: %+v", ct) } }) diff --git a/proc_stats_linux.go b/proc_stats_linux.go index fc13ed9..089044b 100644 --- a/proc_stats_linux.go +++ b/proc_stats_linux.go @@ -31,7 +31,7 @@ func init() { t := time.NewTicker(time.Second) defer t.Stop() var ct *CPUTime - for i := 0; i < 30 || (*ct == CPUTime{}); i++ { + for i := 0; i < 30 || ct.eq(&CPUTime{}); i++ { <-t.C b, err := os.ReadFile(self) if err != nil { @@ -142,6 +142,7 @@ func linuxParseCPUTime(b []byte) (r CPUTime, err error) { return r, fmt.Errorf("insufficient fields present in stat: %d", len(statFields)) } + utimeTicks, err := strconv.ParseInt(string(statFields[13]), 10, 64) if err != nil { return r, fmt.Errorf("failed to parse the utime column of stat: %s", diff --git a/pstats.go b/pstats.go index da55e97..e081e5e 100644 --- a/pstats.go +++ b/pstats.go @@ -45,6 +45,12 @@ func ProcessCPUTime(pid int) (CPUTime, error) { return readProcessCPUTime(pid) } +// eq reports if the two CPUTimes are equal. +func (c *CPUTime) eq(b *CPUTime) bool { + return c.Utime == b.Utime && + c.Stime == b.Stime +} + // MaxRSS returns the maximum RSS (High Water Mark) of the process with PID // pid. // This is a portable wrapper around platform-specific functions. From 869f7719d6f66427310946ba646b87c8566b234b Mon Sep 17 00:00:00 2001 From: David Finkel Date: Fri, 6 Dec 2024 10:53:54 -0500 Subject: [PATCH 5/5] actions: update to go 1.22/1.23 Also, bump actions/setup-go and actions/checkout to the latest available. Also bump staticcheck. --- .github/workflows/go.yml | 6 +++--- .github/workflows/staticcheck.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 6a97944..15c11c3 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -8,17 +8,17 @@ jobs: strategy: matrix: os: [macOS-latest, ubuntu-latest] - goversion: [1.17, 1.18, 1.19] + goversion: ['1.22', '1.23'] steps: - name: Set up Go ${{matrix.goversion}} on ${{matrix.os}} - uses: actions/setup-go@v3 + uses: actions/setup-go@v5 with: go-version: ${{matrix.goversion}} id: go - name: Check out code into the Go module directory - uses: actions/checkout@v1 + uses: actions/checkout@v4 - name: gofmt run: | diff --git a/.github/workflows/staticcheck.yml b/.github/workflows/staticcheck.yml index b7133b5..03adb39 100644 --- a/.github/workflows/staticcheck.yml +++ b/.github/workflows/staticcheck.yml @@ -6,9 +6,9 @@ jobs: name: "staticcheck" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - - uses: dominikh/staticcheck-action@v1.1.0 + - uses: dominikh/staticcheck-action@v1.3.1 with: - version: "2022.1.3" + version: "2024.1.1"