[env_op_images] Add CRI-O pull verification to pulled-images report

nemarjan · nemarjan · commit 4282933558ae · 2026-04-15T15:30:12.000+01:00
Cross-reference the pulled-images report with CRI-O journal logs
from cluster nodes to confirm which images were actually pulled by
the container runtime. Runs automatically when kubeconfig is defined,
same as the pulled-images report itself.

Co-authored-by: Cursor &lt;cursor@cursor.com&gt;

Signed-off-by: nemarjan &lt;nemarjan@redhat.com&gt;
diff --git a/plugins/modules/verify_pulled_report_crio.py b/plugins/modules/verify_pulled_report_crio.py
@@ -0,0 +1,215 @@
+#!/usr/bin/python
+
+# Copyright: (c) 2026, Nemanja Marjanovic <nemarjan@redhat.com>
+# Apache License Version 2.0 (see LICENSE)
+
+from __future__ import absolute_import, division, print_function
+
+__metaclass__ = type
+
+DOCUMENTATION = r"""
+---
+module: verify_pulled_report_crio
+
+short_description: Enrich pulled_images_report with CRI-O pull evidence
+
+description:
+  - Reads the YAML produced by the env_op_images pulled-images report role task.
+  - "Parses CRI-O journal lines for C(msg=\"Pulled image: ...@sha256:...\")."
+  - Adds per-row verification fields using trusted mirror domains from
+    C(summary.mirror_rules).
+
+options:
+  report_path:
+    description: Path to C(pulled_images_report.yaml) (input).
+    required: true
+    type: str
+  output_path:
+    description: Path for the enriched YAML report (output).
+    required: true
+    type: str
+  log_paths:
+    description:
+      - Explicit list of log files to parse (e.g. per-node CRI-O logs).
+      - Combined with files found under I(log_dir) when set.
+    required: false
+    type: list
+    elements: str
+    default: []
+  log_dir:
+    description:
+      - Directory containing CRI-O log files matching I(log_glob).
+    required: false
+    type: str
+  log_glob:
+    description: Glob under I(log_dir). Used only when I(log_dir) is set.
+    required: false
+    default: "*.crio.log"
+    type: str
+
+author:
+  - Red Hat
+
+notes:
+  - Requires PyYAML on the controller (same as other cifmw.general modules).
+"""
+
+EXAMPLES = r"""
+- name: Enrich pulled report using fetched node logs
+  cifmw.general.verify_pulled_report_crio:
+    report_path: "{{ cifmw_env_op_images_pulled_report_path }}"
+    log_dir: "{{ cifmw_env_op_images_crio_logs_dir }}"
+    output_path: "{{ cifmw_env_op_images_verified_report_path }}"
+"""
+
+RETURN = r"""
+changed:
+  description: Whether the output file was written.
+  type: bool
+trusted_mirrors:
+  description: Hostnames extracted from mirror rules in the report summary.
+  type: list
+  elements: str
+log_files:
+  description: Number of log files read.
+  type: int
+entries_with_digest:
+  description: Image rows that had a sha256 digest in C(image_id).
+  type: int
+"""
+
+import glob
+import os
+import re
+
+import yaml
+from ansible.module_utils.basic import AnsibleModule
+
+LOG_PATTERN = re.compile(
+    r'msg="Pulled image: (?P<actual_uri>[^@\s]+)@(?P<id>sha256:[a-f0-9]+)"'
+)
+
+
+def _collect_log_evidence(paths, module):
+    evidence = {}
+    for path in paths:
+        try:
+            with open(path, "r") as f:
+                for line in f:
+                    match = LOG_PATTERN.search(line)
+                    if match:
+                        evidence[match.group("id")] = match.group("actual_uri")
+        except IOError as exc:
+            module.fail_json(
+                msg="Cannot read CRI-O log file {0}: {1}".format(path, str(exc))
+            )
+    return evidence
+
+
+def run_module():
+    module_args = dict(
+        report_path=dict(type="str", required=True),
+        output_path=dict(type="str", required=True),
+        log_paths=dict(type="list", required=False, elements="str", default=[]),
+        log_dir=dict(type="str", required=False),
+        log_glob=dict(type="str", required=False, default="*.crio.log"),
+    )
+
+    module = AnsibleModule(argument_spec=module_args, supports_check_mode=True)
+
+    report_path = module.params["report_path"]
+    output_path = module.params["output_path"]
+    log_paths = module.params["log_paths"] or []
+    log_dir = module.params["log_dir"]
+    log_glob = module.params["log_glob"]
+
+    paths = list(log_paths)
+    if log_dir:
+        paths.extend(sorted(glob.glob(os.path.join(log_dir, log_glob))))
+
+    if not paths:
+        module.fail_json(
+            msg="No CRI-O log files: set log_paths and/or log_dir with matching files."
+        )
+
+    try:
+        with open(report_path, "r") as f:
+            data = yaml.safe_load(f)
+    except IOError as exc:
+        module.fail_json(
+            msg="Cannot read report {0}: {1}".format(report_path, str(exc))
+        )
+    except yaml.YAMLError as exc:
+        module.fail_json(msg="Invalid YAML in report: {0}".format(str(exc)))
+
+    if not isinstance(data, dict):
+        module.fail_json(msg="Report root must be a mapping (dict).")
+
+    trusted_mirrors = set()
+    summary_section = data.get("summary") or {}
+    for rule in summary_section.get("mirror_rules") or []:
+        if not isinstance(rule, dict):
+            continue
+        mirror_url = rule.get("mirror") or ""
+        if mirror_url:
+            domain = mirror_url.split("/")[0].strip()
+            if domain:
+                trusted_mirrors.add(domain)
+
+    log_evidence = _collect_log_evidence(paths, module)
+
+    images_list = data.get("images") or []
+    entries_with_digest = 0
+    for img in images_list:
+        if not isinstance(img, dict):
+            continue
+        image_id = img.get("image_id") or ""
+        sha_match = re.search(r"sha256:[a-f0-9]+", image_id)
+        if not sha_match:
+            continue
+        entries_with_digest += 1
+        img_sha = sha_match.group(0)
+
+        if img_sha in log_evidence:
+            actual_uri = log_evidence[img_sha]
+            actual_domain = actual_uri.split("/")[0].strip()
+            is_mirror_domain = actual_domain in trusted_mirrors
+            img["node_verified_image_origin"] = "mirror" if is_mirror_domain else "source"
+            img["log_evidence_uri"] = actual_uri
+            expected_domain = img.get("expected_pull_location") or ""
+            img["verification_status"] = (
+                "MATCH" if actual_domain == expected_domain else "MISMATCH"
+            )
+        else:
+            img["node_verified_image_origin"] = "cached/unknown"
+            img["verification_status"] = "NOT_FOUND_IN_LOGS"
+
+    result = dict(
+        changed=False,
+        trusted_mirrors=sorted(trusted_mirrors),
+        log_files=len(paths),
+        entries_with_digest=entries_with_digest,
+    )
+
+    if module.check_mode:
+        result["changed"] = True
+        module.exit_json(**result)
+
+    try:
+        with open(output_path, "w") as f:
+            yaml.dump(data, f, default_flow_style=False, sort_keys=False)
+    except IOError as exc:
+        module.fail_json(
+            msg="Cannot write verified report {0}: {1}".format(output_path, str(exc))
+        )
+
+    result["changed"] = True
+    module.exit_json(**result)
+
+
+def main():
+    run_module()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/roles/env_op_images/README.md b/roles/env_op_images/README.md
@@ -4,6 +4,9 @@ A role to gather the container images used in the openstack deployment with spec
 ## Parameters
 * `cifmw_env_op_images_dir`: (String) Directory where the operator_images.yaml will be stored. Defaults to `~/ci-framework-data/artifacts`
 * `cifmw_env_op_images_file`: (String) Name of the file storing the operator images and tags. Defaults to `operator_images.yaml`
+* `cifmw_env_op_images_pulled_report_file` / `cifmw_env_op_images_pulled_report_path`: Pulled-images policy report (ICSP/IDMS + pod image refs).
+* `cifmw_env_op_images_verified_report_file` / `cifmw_env_op_images_verified_report_path`: Output path for the CRI-O-enriched report. After the pulled report runs, fetches `oc adm node-logs NODE -u crio` per node, then writes this file with digest-level CRI-O fields (`node_verified_image_origin`, `log_evidence_uri`, `verification_status`).
+* `cifmw_env_op_images_crio_logs_dir`: Directory for per-node `*.crio.log` files used during verification.
 
 ## Examples
 ```YAML
diff --git a/roles/env_op_images/defaults/main.yml b/roles/env_op_images/defaults/main.yml
@@ -23,6 +23,21 @@ cifmw_env_op_images_file: operator_images.yaml
 cifmw_env_op_images_dryrun: false
 
 cifmw_env_op_images_pulled_report_file: pulled_images_report.yaml
+cifmw_env_op_images_pulled_report_path: >-
+  {{
+    (cifmw_env_op_images_dir, 'artifacts', cifmw_env_op_images_pulled_report_file)
+    | path_join
+  }}
+
+cifmw_env_op_images_verified_report_file: pulled_images_report_verified.yaml
+cifmw_env_op_images_verified_report_path: >-
+  {{
+    (cifmw_env_op_images_dir, 'artifacts', cifmw_env_op_images_verified_report_file)
+    | path_join
+  }}
+cifmw_env_op_images_crio_logs_dir: >-
+  {{ (cifmw_env_op_images_dir, 'artifacts', 'crio_logs') | path_join }}
+
 cifmw_env_op_images_pulled_report_namespaces:
   - "{{ cifmw_openstack_namespace | default('openstack') }}"
   - "{{ operator_namespace | default('openstack-operators') }}"
diff --git a/roles/env_op_images/tasks/main.yml b/roles/env_op_images/tasks/main.yml
@@ -158,3 +158,6 @@
 
 - name: Generate pulled images registry report
   ansible.builtin.include_tasks: pulled_images_report.yml
+
+- name: Verify pulled report against CRI-O node logs
+  ansible.builtin.include_tasks: verify_pulled_report_crio.yml
diff --git a/roles/env_op_images/tasks/pulled_images_report.yml b/roles/env_op_images/tasks/pulled_images_report.yml
@@ -192,11 +192,7 @@
           summary: "{{ _pulled_report_summary }}"
           images: "{{ _pulled_images_report }}"
       ansible.builtin.copy:
-        dest: >-
-          {{
-            (cifmw_env_op_images_dir, 'artifacts',
-             cifmw_env_op_images_pulled_report_file) | path_join
-          }}
+        dest: "{{ cifmw_env_op_images_pulled_report_path }}"
         content: "{{ _full_report | to_nice_yaml }}"
         mode: "0644"
 
diff --git a/roles/env_op_images/tasks/verify_pulled_report_crio.yml b/roles/env_op_images/tasks/verify_pulled_report_crio.yml
@@ -0,0 +1,102 @@
+---
+# Copyright Red Hat, Inc.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# Cross-reference the pulled-images report (from pulled_images_report.yml)
+# with CRI-O journal logs from every cluster node to confirm which images
+# were actually pulled by the container runtime.
+
+- name: Verify pulled report against CRI-O node logs
+  when:
+    - cifmw_openshift_kubeconfig is defined
+  environment:
+    KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}"
+    PATH: "{{ cifmw_path }}"
+  block:
+    - name: Check pulled images report exists
+      ansible.builtin.stat:
+        path: "{{ cifmw_env_op_images_pulled_report_path }}"
+      register: _verify_crio_pulled_stat
+
+    - name: Fail when pulled report is missing
+      when: not _verify_crio_pulled_stat.stat.exists | bool
+      ansible.builtin.fail:
+        msg: >-
+          Pulled report not found at {{ cifmw_env_op_images_pulled_report_path }}.
+          Run pulled_images_report first.
+
+    - name: Ensure CRI-O logs directory exists
+      ansible.builtin.file:
+        path: "{{ cifmw_env_op_images_crio_logs_dir }}"
+        state: directory
+        mode: "0755"
+
+    - name: List cluster nodes
+      ansible.builtin.command:
+        cmd: oc get nodes -o json
+      register: _verify_crio_nodes_json
+      changed_when: false
+
+    - name: Extract node names
+      when: _verify_crio_nodes_json.rc == 0
+      ansible.builtin.set_fact:
+        _verify_crio_node_names: >-
+          {{
+            (_verify_crio_nodes_json.stdout | from_json).get('items', [])
+            | map(attribute='metadata.name') | list
+          }}
+
+    - name: Fail when oc get nodes did not succeed
+      when: _verify_crio_nodes_json.rc != 0
+      ansible.builtin.fail:
+        msg: >-
+          oc get nodes failed (rc={{ _verify_crio_nodes_json.rc }}); cannot fetch CRI-O logs.
+
+    # Filename is sanitised to avoid path-traversal with unusual node names.
+    - name: Fetch CRI-O unit logs per node
+      ansible.builtin.shell: >-
+        oc adm node-logs "{{ item }}" -u crio >
+        "{{ cifmw_env_op_images_crio_logs_dir }}/{{ item | regex_replace('[^A-Za-z0-9._-]+', '_') }}.crio.log"
+      loop: "{{ _verify_crio_node_names | default([]) }}"
+      register: _verify_crio_fetch
+
+    # Non-fatal: some nodes may be unreachable (e.g. NotReady).
+    - name: Warn when node log fetch failed for a node
+      when: item.rc | default(0) != 0
+      ansible.builtin.debug:
+        msg: "oc adm node-logs failed for node (rc={{ item.rc | default('n/a') }}): {{ item.item | default('unknown') }}"
+      loop: "{{ _verify_crio_fetch.results | default([]) }}"
+      loop_control:
+        label: "{{ item.item | default('') }}"
+
+    - name: Find fetched CRI-O log files
+      ansible.builtin.find:
+        paths: "{{ cifmw_env_op_images_crio_logs_dir }}"
+        patterns: "*.crio.log"
+      register: _verify_crio_log_files
+
+    - name: Enrich pulled report with CRI-O evidence
+      when: _verify_crio_log_files.matched | int > 0
+      cifmw.general.verify_pulled_report_crio:
+        report_path: "{{ cifmw_env_op_images_pulled_report_path }}"
+        log_dir: "{{ cifmw_env_op_images_crio_logs_dir }}"
+        output_path: "{{ cifmw_env_op_images_verified_report_path }}"
+
+    - name: Fail when no CRI-O logs were written
+      when: _verify_crio_log_files.matched | int == 0
+      ansible.builtin.fail:
+        msg: >-
+          No *.crio.log files under {{ cifmw_env_op_images_crio_logs_dir }}.
+          Check cluster credentials and oc adm node-logs access.
diff --git a/tests/sanity/ignore.txt b/tests/sanity/ignore.txt
@@ -5,3 +5,4 @@ plugins/modules/tempest_list_skipped.py validate-modules:missing-gplv3-license #
 plugins/modules/cephx_key.py validate-modules:missing-gplv3-license # ignore license check
 plugins/modules/krb_request.py validate-modules:missing-gplv3-license # ignore license check
 plugins/modules/pem_read.py validate-modules:missing-gplv3-license # ignore license check
+plugins/modules/verify_pulled_report_crio.py validate-modules:missing-gplv3-license # ignore license check