|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +# Copyright: (c) 2026, Nemanja Marjanovic <nemarjan@redhat.com> |
| 4 | +# Apache License Version 2.0 (see LICENSE) |
| 5 | + |
| 6 | +from __future__ import absolute_import, division, print_function |
| 7 | + |
| 8 | +__metaclass__ = type |
| 9 | + |
| 10 | +DOCUMENTATION = r""" |
| 11 | +--- |
| 12 | +module: verify_pulled_report_crio |
| 13 | +
|
| 14 | +short_description: Enrich pulled_images_report with CRI-O pull evidence |
| 15 | +
|
| 16 | +description: |
| 17 | + - Reads the YAML produced by the env_op_images pulled-images report role task. |
| 18 | + - "Parses CRI-O journal lines for C(msg=\"Pulled image: ...@sha256:...\")." |
| 19 | + - Adds per-row verification fields using trusted mirror domains from |
| 20 | + C(summary.mirror_rules). |
| 21 | + - When images carry a C(node) field, evidence is matched against the |
| 22 | + specific node's CRI-O log first. If the digest is only found on a |
| 23 | + different node the status is set to C(MISMATCH_CROSS_NODE). |
| 24 | + - Log files are expected to follow the C(<node-name>.crio.log) naming |
| 25 | + convention produced by the role task. |
| 26 | +
|
| 27 | +options: |
| 28 | + report_path: |
| 29 | + description: Path to C(pulled_images_report.yaml) (input). |
| 30 | + required: true |
| 31 | + type: str |
| 32 | + output_path: |
| 33 | + description: Path for the enriched YAML report (output). |
| 34 | + required: true |
| 35 | + type: str |
| 36 | + log_paths: |
| 37 | + description: |
| 38 | + - Explicit list of log files to parse (e.g. per-node CRI-O logs). |
| 39 | + - Combined with files found under I(log_dir) when set. |
| 40 | + required: false |
| 41 | + type: list |
| 42 | + elements: str |
| 43 | + default: [] |
| 44 | + log_dir: |
| 45 | + description: |
| 46 | + - Directory containing CRI-O log files matching I(log_glob). |
| 47 | + required: false |
| 48 | + type: str |
| 49 | + log_glob: |
| 50 | + description: Glob under I(log_dir). Used only when I(log_dir) is set. |
| 51 | + required: false |
| 52 | + default: "*.crio.log" |
| 53 | + type: str |
| 54 | +
|
| 55 | +author: |
| 56 | + - Nemanja Marjanovic (@nemarjan) |
| 57 | +
|
| 58 | +notes: |
| 59 | + - Requires PyYAML on the controller (same as other cifmw.general modules). |
| 60 | +""" |
| 61 | + |
| 62 | +EXAMPLES = r""" |
| 63 | +- name: Enrich pulled report using fetched node logs |
| 64 | + cifmw.general.verify_pulled_report_crio: |
| 65 | + report_path: "{{ cifmw_env_op_images_pulled_report_path }}" |
| 66 | + log_dir: "{{ cifmw_env_op_images_crio_logs_dir }}" |
| 67 | + output_path: "{{ cifmw_env_op_images_verified_report_path }}" |
| 68 | +""" |
| 69 | + |
| 70 | +RETURN = r""" |
| 71 | +changed: |
| 72 | + description: Whether the output file was written. |
| 73 | + type: bool |
| 74 | + returned: always |
| 75 | +trusted_mirrors: |
| 76 | + description: Hostnames extracted from mirror rules in the report summary. |
| 77 | + type: list |
| 78 | + elements: str |
| 79 | + returned: always |
| 80 | +log_files: |
| 81 | + description: Number of log files read. |
| 82 | + type: int |
| 83 | + returned: always |
| 84 | +entries_with_digest: |
| 85 | + description: Image rows that had a sha256 digest in C(image_id). |
| 86 | + type: int |
| 87 | + returned: always |
| 88 | +cross_node_entries: |
| 89 | + description: >- |
| 90 | + Image rows where evidence was found only on a different node |
| 91 | + than where the pod ran (C(MISMATCH_CROSS_NODE)). |
| 92 | + type: int |
| 93 | + returned: always |
| 94 | +nodes_with_evidence: |
| 95 | + description: >- |
| 96 | + Node names that had at least one C(Pulled image) log entry. |
| 97 | + type: list |
| 98 | + elements: str |
| 99 | + returned: always |
| 100 | +""" |
| 101 | + |
| 102 | +import glob |
| 103 | +import os |
| 104 | +import re |
| 105 | + |
| 106 | +import yaml |
| 107 | +from ansible.module_utils.basic import AnsibleModule |
| 108 | + |
| 109 | +LOG_PATTERN = re.compile( |
| 110 | + r'msg="Pulled image: (?P<actual_uri>[^@\s]+)@(?P<id>sha256:[a-f0-9]+)"' |
| 111 | +) |
| 112 | +SHA256_PATTERN = re.compile(r"sha256:[a-f0-9]+") |
| 113 | + |
| 114 | + |
| 115 | +def _node_from_path(path): |
| 116 | + """Derive node name from the ``<node>.crio.log`` naming convention.""" |
| 117 | + basename = os.path.basename(path) |
| 118 | + suffix_pos = basename.find(".crio.log") |
| 119 | + if suffix_pos > 0: |
| 120 | + return basename[:suffix_pos] |
| 121 | + return os.path.splitext(basename)[0] |
| 122 | + |
| 123 | + |
| 124 | +def _domain_from_uri(uri): |
| 125 | + """Return the registry host (+ optional port) from an image URI.""" |
| 126 | + return uri.split("/")[0].strip() |
| 127 | + |
| 128 | + |
| 129 | +def _apply_evidence(img, actual_uri, evidence_node, trusted_mirrors): |
| 130 | + """Set common verification fields on an image row that has log evidence.""" |
| 131 | + actual_domain = _domain_from_uri(actual_uri) |
| 132 | + img["node_verified_image_origin"] = ( |
| 133 | + "mirror" if actual_domain in trusted_mirrors else "source" |
| 134 | + ) |
| 135 | + img["log_evidence_uri"] = actual_uri |
| 136 | + img["log_evidence_node"] = evidence_node |
| 137 | + return actual_domain |
| 138 | + |
| 139 | + |
| 140 | +def _collect_log_evidence(paths, module): |
| 141 | + """Parse CRI-O logs into per-node and global evidence dicts. |
| 142 | +
|
| 143 | + Returns: |
| 144 | + per_node: ``{node_name: {sha256_digest: pull_uri}}`` |
| 145 | + global_evidence: ``{sha256_digest: (pull_uri, node_name)}`` |
| 146 | + (last writer wins across nodes for the global dict) |
| 147 | + """ |
| 148 | + per_node = {} |
| 149 | + global_evidence = {} |
| 150 | + for path in paths: |
| 151 | + node = _node_from_path(path) |
| 152 | + node_ev = per_node.setdefault(node, {}) |
| 153 | + try: |
| 154 | + with open(path, "r") as f: |
| 155 | + for line in f: |
| 156 | + match = LOG_PATTERN.search(line) |
| 157 | + if match: |
| 158 | + digest = match.group("id") |
| 159 | + uri = match.group("actual_uri") |
| 160 | + node_ev[digest] = uri |
| 161 | + global_evidence[digest] = (uri, node) |
| 162 | + except IOError as exc: |
| 163 | + module.fail_json( |
| 164 | + msg="Cannot read CRI-O log file {0}: {1}".format(path, str(exc)) |
| 165 | + ) |
| 166 | + return per_node, global_evidence |
| 167 | + |
| 168 | + |
| 169 | +def run_module(): |
| 170 | + module_args = dict( |
| 171 | + report_path=dict(type="str", required=True), |
| 172 | + output_path=dict(type="str", required=True), |
| 173 | + log_paths=dict(type="list", required=False, elements="str", default=[]), |
| 174 | + log_dir=dict(type="str", required=False), |
| 175 | + log_glob=dict(type="str", required=False, default="*.crio.log"), |
| 176 | + ) |
| 177 | + |
| 178 | + module = AnsibleModule(argument_spec=module_args, supports_check_mode=True) |
| 179 | + |
| 180 | + report_path = module.params["report_path"] |
| 181 | + output_path = module.params["output_path"] |
| 182 | + log_paths = module.params["log_paths"] or [] |
| 183 | + log_dir = module.params["log_dir"] |
| 184 | + log_glob = module.params["log_glob"] |
| 185 | + |
| 186 | + paths = list(log_paths) |
| 187 | + if log_dir: |
| 188 | + paths.extend(sorted(glob.glob(os.path.join(log_dir, log_glob)))) |
| 189 | + |
| 190 | + if not paths: |
| 191 | + module.fail_json( |
| 192 | + msg="No CRI-O log files: set log_paths and/or log_dir with matching files." |
| 193 | + ) |
| 194 | + |
| 195 | + try: |
| 196 | + with open(report_path, "r") as f: |
| 197 | + data = yaml.safe_load(f) |
| 198 | + except IOError as exc: |
| 199 | + module.fail_json( |
| 200 | + msg="Cannot read report {0}: {1}".format(report_path, str(exc)) |
| 201 | + ) |
| 202 | + except yaml.YAMLError as exc: |
| 203 | + module.fail_json(msg="Invalid YAML in report: {0}".format(str(exc))) |
| 204 | + |
| 205 | + if not isinstance(data, dict): |
| 206 | + module.fail_json(msg="Report root must be a mapping (dict).") |
| 207 | + |
| 208 | + trusted_mirrors = set() |
| 209 | + summary_section = data.get("summary") or {} |
| 210 | + for rule in summary_section.get("mirror_rules") or []: |
| 211 | + if not isinstance(rule, dict): |
| 212 | + continue |
| 213 | + mirror_url = rule.get("mirror") or "" |
| 214 | + if mirror_url: |
| 215 | + domain = _domain_from_uri(mirror_url) |
| 216 | + if domain: |
| 217 | + trusted_mirrors.add(domain) |
| 218 | + |
| 219 | + per_node_evidence, global_evidence = _collect_log_evidence(paths, module) |
| 220 | + |
| 221 | + images_list = data.get("images") or [] |
| 222 | + entries_with_digest = 0 |
| 223 | + cross_node_entries = 0 |
| 224 | + for img in images_list: |
| 225 | + if not isinstance(img, dict): |
| 226 | + continue |
| 227 | + image_id = img.get("image_id") or "" |
| 228 | + sha_match = SHA256_PATTERN.search(image_id) |
| 229 | + if not sha_match: |
| 230 | + continue |
| 231 | + entries_with_digest += 1 |
| 232 | + img_sha = sha_match.group(0) |
| 233 | + img_node = img.get("node") or "" |
| 234 | + |
| 235 | + node_local_hit = ( |
| 236 | + img_node |
| 237 | + and img_node in per_node_evidence |
| 238 | + and img_sha in per_node_evidence[img_node] |
| 239 | + ) |
| 240 | + |
| 241 | + if node_local_hit: |
| 242 | + actual_uri = per_node_evidence[img_node][img_sha] |
| 243 | + actual_domain = _apply_evidence(img, actual_uri, img_node, trusted_mirrors) |
| 244 | + expected_domain = img.get("expected_pull_location") or "" |
| 245 | + img["verification_status"] = ( |
| 246 | + "MATCH" if actual_domain == expected_domain else "MISMATCH" |
| 247 | + ) |
| 248 | + elif img_sha in global_evidence: |
| 249 | + actual_uri, evidence_node = global_evidence[img_sha] |
| 250 | + actual_domain = _apply_evidence( |
| 251 | + img, actual_uri, evidence_node, trusted_mirrors |
| 252 | + ) |
| 253 | + expected_domain = img.get("expected_pull_location") or "" |
| 254 | + if img_node: |
| 255 | + img["verification_status"] = "MISMATCH_CROSS_NODE" |
| 256 | + cross_node_entries += 1 |
| 257 | + else: |
| 258 | + img["verification_status"] = ( |
| 259 | + "MATCH" if actual_domain == expected_domain else "MISMATCH" |
| 260 | + ) |
| 261 | + else: |
| 262 | + img["node_verified_image_origin"] = "cached/unknown" |
| 263 | + img["log_evidence_uri"] = None |
| 264 | + img["log_evidence_node"] = None |
| 265 | + img["verification_status"] = "NOT_FOUND_IN_LOGS" |
| 266 | + |
| 267 | + nodes_with_evidence = sorted(n for n, ev in per_node_evidence.items() if ev) |
| 268 | + result = dict( |
| 269 | + changed=False, |
| 270 | + trusted_mirrors=sorted(trusted_mirrors), |
| 271 | + log_files=len(paths), |
| 272 | + entries_with_digest=entries_with_digest, |
| 273 | + cross_node_entries=cross_node_entries, |
| 274 | + nodes_with_evidence=nodes_with_evidence, |
| 275 | + ) |
| 276 | + |
| 277 | + if module.check_mode: |
| 278 | + result["changed"] = True |
| 279 | + module.exit_json(**result) |
| 280 | + |
| 281 | + try: |
| 282 | + with open(output_path, "w") as f: |
| 283 | + yaml.dump(data, f, default_flow_style=False, sort_keys=False) |
| 284 | + except IOError as exc: |
| 285 | + module.fail_json( |
| 286 | + msg="Cannot write verified report {0}: {1}".format(output_path, str(exc)) |
| 287 | + ) |
| 288 | + |
| 289 | + result["changed"] = True |
| 290 | + module.exit_json(**result) |
| 291 | + |
| 292 | + |
| 293 | +def main(): |
| 294 | + run_module() |
| 295 | + |
| 296 | + |
| 297 | +if __name__ == "__main__": |
| 298 | + main() |
0 commit comments