Skip to content
This repository has been archived by the owner on Nov 8, 2024. It is now read-only.

Update lxc config option and fix cgroup2 volume mount #38

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions lxc/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@ var (
hclspec.NewAttr("volumes_enabled", "bool", false),
hclspec.NewLiteral("true"),
),
"default_config": hclspec.NewDefault(
hclspec.NewAttr("default_config", "string", false),
hclspec.NewLiteral("\""+lxc.GlobalConfigItem("lxc.default_config")+"\""),
),
Comment on lines -50 to -53
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have this set in the plugin configuration and not the job spec because under the Nomad security model personas the LXC config file would be controlled by the System Administrator or Nomad Administrator, whereas the jobspec is controlled by the Nomad Operator (the person who runs nomad job run). Generally speaking it doesn't make sense to give the Nomad Operator the ability to pick and choose among files that are owned by the client.

Additionally, the appropriate file would be a property of the client, not all clients, so that belongs in the plugin configuration. Imagine the case where someone submits a job with a particular LXC config path that doesn't exist on the client it happens to get placed on.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Will rework.

"lxc_path": hclspec.NewAttr("lxc_path", "string", false),
"network_mode": hclspec.NewDefault(
hclspec.NewAttr("network_mode", "string", false),
Expand Down Expand Up @@ -88,6 +84,7 @@ var (
"verbosity": hclspec.NewAttr("verbosity", "string", false),
"volumes": hclspec.NewAttr("volumes", "list(string)", false),
"network_mode": hclspec.NewAttr("network_mode", "string", false),
"config_file": hclspec.NewAttr("config_file", "string", false),
})

// capabilities is returned by the Capabilities RPC and indicates what
Expand Down Expand Up @@ -138,8 +135,6 @@ type Config struct {

AllowVolumes bool `codec:"volumes_enabled"`

DefaultConfig string `codec:"default_config"`

LXCPath string `codec:"lxc_path"`

// default networking mode if not specified in task config
Expand All @@ -166,7 +161,7 @@ type TaskConfig struct {
Verbosity string `codec:"verbosity"`
Volumes []string `codec:"volumes"`
NetworkMode string `codec:"network_mode"`
DefaultConfig string `codec:"default_config"`
ConfigFile string `codec:"config_file"`
}

// TaskState is the state which is encoded in the handle returned in
Expand Down
31 changes: 1 addition & 30 deletions lxc/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ type taskHandle struct {
var (
LXCMeasuredCpuStats = []string{"System Mode", "User Mode", "Percent"}

LXCMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage", "Kernel Usage", "Kernel Max Usage"}
LXCMeasuredMemStats = []string{"RSS", "Cache", "Swap"}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You said in the PR description:

Also removed various stats no longer supported with cgroups v2 and some were removed in kernel 5.4+.

Those stats will still be supported on cgroups v1 machines. Unless all supported versions of LXC stopped supporting cgroups v2, we shouldn't remove these stats. I can't find any notes to this in their docs, but I'll admit their docs are also not very searchable 😀

Copy link
Contributor Author

@h0tw1r3 h0tw1r3 Jul 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed them due to provide a consistent view regardless of the system cgroup version. My goal was to support jobs running in a mixed cgroup environment. Provides a seamless path to cgroup v2 only systems.

If you would prefer to keep the stats (with no values for v2), I could ignore errors when fetching measured stats from LXC.
Or, define two different LXCMeasuredMemStats values depending on the cgroup version.

Open to any suggestions.

)

func (h *taskHandle) TaskStatus() *drivers.TaskStatus {
Expand Down Expand Up @@ -151,35 +151,6 @@ func (h *taskHandle) handleStats(ctx context.Context, ch chan *drivers.TaskResou
Measured: LXCMeasuredMemStats,
}

mu := h.container.CgroupItem("memory.max_usage_in_bytes")
for _, rawMemMaxUsage := range mu {
val, err := strconv.ParseUint(rawMemMaxUsage, 10, 64)
if err != nil {
h.logger.Error("failed to get max memory usage", "error", err)
continue
}
ms.MaxUsage = val
}
ku := h.container.CgroupItem("memory.kmem.usage_in_bytes")
for _, rawKernelUsage := range ku {
val, err := strconv.ParseUint(rawKernelUsage, 10, 64)
if err != nil {
h.logger.Error("failed to get kernel memory usage", "error", err)
continue
}
ms.KernelUsage = val
}

mku := h.container.CgroupItem("memory.kmem.max_usage_in_bytes")
for _, rawMaxKernelUsage := range mku {
val, err := strconv.ParseUint(rawMaxKernelUsage, 10, 64)
if err != nil {
h.logger.Error("failed tog get max kernel memory usage", "error", err)
continue
}
ms.KernelMaxUsage = val
}

taskResUsage := drivers.TaskResourceUsage{
ResourceUsage: &drivers.ResourceUsage{
CpuStats: cs,
Expand Down
19 changes: 11 additions & 8 deletions lxc/lxc.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ func (d *Driver) initializeContainer(cfg *drivers.TaskConfig, taskConfig TaskCon
}

// use task specific config
defaultConfig := taskConfig.DefaultConfig
if defaultConfig == "" {
// but fallback to global config
defaultConfig = d.config.DefaultConfig
lxcConfigFile := taskConfig.ConfigFile
if taskConfig.ConfigFile == "" {
// fallback to global config
lxcConfigFile = lxc.GlobalConfigItem("lxc.default_config")
}
err = c.LoadConfigFile(defaultConfig)
err = c.LoadConfigFile(lxcConfigFile)
if err != nil {
d.logger.Warn("failed to load default config", "path", defaultConfig, "error", err)
d.logger.Warn("failed to load lxc config file", "path", lxcConfigFile, "error", err)
}

return c, nil
Expand Down Expand Up @@ -150,8 +150,11 @@ func (d *Driver) mountVolumes(c *lxc.Container, cfg *drivers.TaskConfig, taskCon
}

for _, cgroupDev := range devCgroupAllows {
if err := c.SetConfigItem("lxc.cgroup.devices.allow", cgroupDev); err != nil {
return fmt.Errorf("error setting cgroup permission %q error: %v", cgroupDev, err)
// Try unified, fallback to legacy
if err := c.SetConfigItem("lxc.cgroup2.devices.allow", cgroupDev); err != nil {
if err = c.SetConfigItem("lxc.cgroup.devices.allow", cgroupDev); err != nil {
return fmt.Errorf("error setting cgroup permission %q error: %v", cgroupDev, err)
}
}
}

Expand Down