From d50992f2e23787d1ac8ec67a17a91fbafe859c52 Mon Sep 17 00:00:00 2001 From: "Christopher M. Cantalupo" Date: Fri, 5 Jun 2026 16:28:48 -0700 Subject: [PATCH 1/2] resctrl-mon: revert CRI-O pidfile fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f7883b9e ("resctrl-mon: read container PID from CRI-O pidfile"). The readContainerPID() fallback reads container PIDs from CRI-O's internal storage at /run/containers/storage/overlay-containers/. The default Helm daemonset template does not mount this path, so the function always receives ENOENT and returns 0 — the code is a complete no-op in the shipped deployment and no information is leaked. However, the fallback cannot function without adding a hostPath volume for /run/containers/storage. That directory also contains the full OCI runtime spec (config.json) for every container on the node, including process environment variables and secrets. Mounting it grants the monitoring plugin read access to credentials of all host containers — an unacceptable privilege escalation for a read-only monitoring plugin. Rather than ship non-functional code whose only activation path is insecure, remove it entirely. The fallback was added to work around CRI-O <= 1.35 not populating Container.Pid in NRI events. The proper fix is upstream in CRI-O (commit 19d319695) and will be available in CRI-O >= 1.36. Until then, the plugin is effectively a no-op on affected CRI-O versions: mon_group directories are created but remain empty since no PIDs can be assigned. containerd users are unaffected — PID is always populated via NRI. Signed-off-by: Christopher M. Cantalupo --- cmd/plugins/resctrl-mon/plugin.go | 39 +++---------------------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/cmd/plugins/resctrl-mon/plugin.go b/cmd/plugins/resctrl-mon/plugin.go index b90530db5..52b067b27 100644 --- a/cmd/plugins/resctrl-mon/plugin.go +++ b/cmd/plugins/resctrl-mon/plugin.go @@ -19,8 +19,6 @@ import ( "fmt" "os" "path/filepath" - "strconv" - "strings" "sync" "time" @@ -143,9 +141,6 @@ func (p *plugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, contai continue } pid := int(ctr.GetPid()) - if pid == 0 { - pid = readContainerPID(ctr.GetId()) - } if pid > 0 { monGroupDir := p.state.getMonGroupDir(podUID) if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { @@ -275,23 +270,15 @@ func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ct return nil } - // Fallback: if the NRI event has no PID (CRI-O <= 1.35 does not - // populate Container.Pid), read it from the CRI-O pidfile. - if pid == 0 { - pid = readContainerPID(ctr.GetId()) - if pid > 0 { - log.Debugf("PostStartContainer %s: read pid %d from pidfile", ctrName, pid) - } - } - + // Fallback: write the init PID if StartContainer didn't. if pid > 0 { if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { log.Warnf("PostStartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) } else { - log.Infof("PostStartContainer %s: assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) + log.Infof("PostStartContainer %s: fallback assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) } } else { - log.Warnf("PostStartContainer %s: PID not available from NRI or pidfile", ctrName) + log.Warnf("PostStartContainer %s: PID still 0 after start, unexpected", ctrName) } return nil @@ -395,23 +382,3 @@ func getRDTClass(ctr *api.Container) string { func pprintCtr(pod *api.PodSandbox, ctr *api.Container) string { return fmt.Sprintf("%s/%s:%s", pod.GetNamespace(), pod.GetName(), ctr.GetName()) } - -// readContainerPID reads the container init PID from the CRI-O pidfile. -// CRI-O versions <= 1.35 do not populate Container.Pid in NRI events. -// As a fallback, we read the PID from the container's pidfile at -// /run/containers/storage/overlay-containers//userdata/pidfile. -// Returns 0 if the PID cannot be read (e.g., running under containerd). -func readContainerPID(containerID string) int { - pidfile := filepath.Join("/run/containers/storage/overlay-containers", containerID, "userdata/pidfile") - data, err := os.ReadFile(pidfile) - if err != nil { - log.Debugf("readContainerPID: cannot read %s: %v", pidfile, err) - return 0 - } - pid, err := strconv.Atoi(strings.TrimSpace(string(data))) - if err != nil { - log.Debugf("readContainerPID: invalid pid in %s: %v", pidfile, err) - return 0 - } - return pid -} From 94c0ccedf5bfabb78a61a887941412e88f55b313 Mon Sep 17 00:00:00 2001 From: "Christopher M. Cantalupo" Date: Fri, 5 Jun 2026 16:56:09 -0700 Subject: [PATCH 2/2] resctrl-mon: reject CRI-O versions that lack NRI PID support CRI-O versions before 1.36 do not populate Container.Pid in NRI events (the fix is CRI-O commit 19d319695, not yet in a release). Without a PID, the plugin cannot assign container tasks to monitoring groups, and empty mon_groups silently report zero for all metrics. Rather than degrade silently, fail hard during Configure when the runtime identifies as CRI-O with a version below 1.36. The error message directs the user to upgrade CRI-O or use containerd. This makes the failure mode explicit: the DaemonSet will enter CrashLoopBackOff with a clear log message rather than appearing healthy while producing no useful monitoring data. Signed-off-by: Christopher M. Cantalupo --- cmd/plugins/resctrl-mon/plugin.go | 40 ++++++++++++++++++++++++-- cmd/plugins/resctrl-mon/plugin_test.go | 37 ++++++++++++++++++++++++ deployment/helm/resctrl-mon/README.md | 6 ++-- docs/monitoring/resctrl-mon.md | 16 +++++++++-- 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/cmd/plugins/resctrl-mon/plugin.go b/cmd/plugins/resctrl-mon/plugin.go index 52b067b27..c627e9582 100644 --- a/cmd/plugins/resctrl-mon/plugin.go +++ b/cmd/plugins/resctrl-mon/plugin.go @@ -19,6 +19,8 @@ import ( "fmt" "os" "path/filepath" + "strconv" + "strings" "sync" "time" @@ -72,6 +74,9 @@ func newPlugin() *plugin { // Configure handles connecting to container runtime's NRI server. func (p *plugin) Configure(ctx context.Context, config, runtime, version string) (stub.EventMask, error) { log.Infof("Connected to %s %s...", runtime, version) + if err := checkRuntimeVersion(runtime, version); err != nil { + return 0, err + } if config != "" { log.Debugf("loading configuration from NRI server") if err := p.setConfig([]byte(config)); err != nil { @@ -270,15 +275,14 @@ func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ct return nil } - // Fallback: write the init PID if StartContainer didn't. if pid > 0 { if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { log.Warnf("PostStartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) } else { - log.Infof("PostStartContainer %s: fallback assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) + log.Infof("PostStartContainer %s: assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) } } else { - log.Warnf("PostStartContainer %s: PID still 0 after start, unexpected", ctrName) + log.Warnf("PostStartContainer %s: PID=0, cannot assign to mon_group (runtime did not provide PID via NRI)", ctrName) } return nil @@ -382,3 +386,33 @@ func getRDTClass(ctr *api.Container) string { func pprintCtr(pod *api.PodSandbox, ctr *api.Container) string { return fmt.Sprintf("%s/%s:%s", pod.GetNamespace(), pod.GetName(), ctr.GetName()) } + +// checkRuntimeVersion verifies that the container runtime provides PIDs via NRI. +// CRI-O versions before 1.36 do not populate Container.Pid in NRI events, +// making the plugin unable to assign tasks to monitoring groups. +func checkRuntimeVersion(runtime, version string) error { + if !strings.EqualFold(runtime, "cri-o") { + return nil + } + // Normalize: strip leading "v" and any pre-release/build suffix. + version = strings.TrimPrefix(version, "v") + if idx := strings.IndexAny(version, "-+"); idx != -1 { + version = version[:idx] + } + parts := strings.SplitN(version, ".", 3) + if len(parts) < 2 { + return fmt.Errorf("CRI-O version %q: unable to parse; require >= 1.36 for NRI PID support", version) + } + major, err := strconv.Atoi(parts[0]) + if err != nil { + return fmt.Errorf("CRI-O version %q: unable to parse major version: %w", version, err) + } + minor, err := strconv.Atoi(parts[1]) + if err != nil { + return fmt.Errorf("CRI-O version %q: unable to parse minor version: %w", version, err) + } + if major < 1 || (major == 1 && minor < 36) { + return fmt.Errorf("CRI-O %s does not provide container PIDs via NRI (requires >= 1.36)", version) + } + return nil +} diff --git a/cmd/plugins/resctrl-mon/plugin_test.go b/cmd/plugins/resctrl-mon/plugin_test.go index 272c91754..7633760cb 100644 --- a/cmd/plugins/resctrl-mon/plugin_test.go +++ b/cmd/plugins/resctrl-mon/plugin_test.go @@ -367,3 +367,40 @@ func TestStopContainer_RemovesStateOnRmdirFailure(t *testing.T) { require.NoError(t, err) assert.Equal(t, 0, p.state.podCount()) } + +func TestCheckRuntimeVersion(t *testing.T) { + tests := []struct { + name string + runtime string + version string + wantErr bool + }{ + {"containerd any version", "containerd", "2.0.0", false}, + {"cri-o 1.36.0", "cri-o", "1.36.0", false}, + {"cri-o 1.37.0", "cri-o", "1.37.0", false}, + {"cri-o 2.0.0", "cri-o", "2.0.0", false}, + {"cri-o 1.35.0 rejected", "cri-o", "1.35.0", true}, + {"cri-o 1.35.2 rejected", "cri-o", "1.35.2", true}, + {"cri-o 1.31.5 rejected", "cri-o", "1.31.5", true}, + {"cri-o 0.99.0 rejected", "cri-o", "0.99.0", true}, + {"CRI-O case insensitive", "CRI-O", "1.35.0", true}, + {"cri-o no patch", "cri-o", "1.36", false}, + {"cri-o unparsable", "cri-o", "latest", true}, + {"cri-o v prefix accepted", "cri-o", "v1.36.0", false}, + {"cri-o v prefix rejected", "cri-o", "v1.35.0", true}, + {"cri-o strips pre-release suffix", "cri-o", "1.36.0-rc1", false}, + {"cri-o strips pre-release old version", "cri-o", "1.35.0-beta.1", true}, + {"cri-o strips build metadata", "cri-o", "1.36.0+build123", false}, + {"cri-o strips v prefix and pre-release", "cri-o", "v1.35.0-alpha.0", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := checkRuntimeVersion(tt.runtime, tt.version) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/deployment/helm/resctrl-mon/README.md b/deployment/helm/resctrl-mon/README.md index 206e3eced..b77809acf 100644 --- a/deployment/helm/resctrl-mon/README.md +++ b/deployment/helm/resctrl-mon/README.md @@ -30,8 +30,10 @@ to support Application Energy Telemetry (AET) via Kepler passive mode. installation. - CRI-O - - At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) - release version to use the NRI feature + - At least [v1.36.0](https://github.com/cri-o/cri-o/releases/tag/v1.36.0) + release version. CRI-O >= 1.36 is required because earlier versions do + not provide container PIDs via NRI, which prevents the plugin from + assigning tasks to monitoring groups. - Enable NRI feature by following [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) detailed instructions. You can optionally enable the NRI in CRI-O using diff --git a/docs/monitoring/resctrl-mon.md b/docs/monitoring/resctrl-mon.md index 7cd6c3198..ec98a0aa3 100644 --- a/docs/monitoring/resctrl-mon.md +++ b/docs/monitoring/resctrl-mon.md @@ -86,8 +86,10 @@ RMID allocation is delegated entirely to the Linux kernel: ### Prerequisites -- Containerd v1.7+ or CRI-O v1.26+ -- Enable NRI in /etc/containerd/config.toml: +- Containerd v1.7+ or CRI-O v1.36+ +- Enable NRI in the container runtime: + + **containerd** — in `/etc/containerd/config.toml`: ```toml [plugins."io.containerd.nri.v1.nri"] @@ -100,6 +102,16 @@ RMID allocation is delegated entirely to the Linux kernel: socket_path = "/var/run/nri/nri.sock" ``` + **CRI-O** — in `/etc/crio/crio.conf.d/10-nri.conf` (or equivalent): + + ```toml + [crio.nri] + enable_nri = true + ``` + + See the [CRI-O NRI documentation](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) + for additional options. + - Intel CPU with RDT monitoring support - resctrl filesystem mounted at `/sys/fs/resctrl`