|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +# Copyright: (c) 2026, Nemanja Marjanovic <nemarjan@redhat.com> |
| 4 | +# Apache License Version 2.0 (see LICENSE) |
| 5 | + |
| 6 | +from __future__ import absolute_import, division, print_function |
| 7 | + |
| 8 | +__metaclass__ = type |
| 9 | + |
| 10 | +DOCUMENTATION = r""" |
| 11 | +--- |
| 12 | +module: verify_pulled_report_crio |
| 13 | +
|
| 14 | +short_description: Enrich pulled_images_report with CRI-O pull evidence |
| 15 | +
|
| 16 | +description: |
| 17 | + - Reads the YAML produced by the env_op_images pulled-images report role task. |
| 18 | + - "Parses CRI-O journal lines for C(msg=\"Pulled image: ...@sha256:...\")." |
| 19 | + - Adds per-row verification fields using trusted mirror domains from |
| 20 | + C(summary.mirror_rules). |
| 21 | +
|
| 22 | +options: |
| 23 | + report_path: |
| 24 | + description: Path to C(pulled_images_report.yaml) (input). |
| 25 | + required: true |
| 26 | + type: str |
| 27 | + output_path: |
| 28 | + description: Path for the enriched YAML report (output). |
| 29 | + required: true |
| 30 | + type: str |
| 31 | + log_paths: |
| 32 | + description: |
| 33 | + - Explicit list of log files to parse (e.g. per-node CRI-O logs). |
| 34 | + - Combined with files found under I(log_dir) when set. |
| 35 | + required: false |
| 36 | + type: list |
| 37 | + elements: str |
| 38 | + default: [] |
| 39 | + log_dir: |
| 40 | + description: |
| 41 | + - Directory containing CRI-O log files matching I(log_glob). |
| 42 | + required: false |
| 43 | + type: str |
| 44 | + log_glob: |
| 45 | + description: Glob under I(log_dir). Used only when I(log_dir) is set. |
| 46 | + required: false |
| 47 | + default: "*.crio.log" |
| 48 | + type: str |
| 49 | +
|
| 50 | +author: |
| 51 | + - Red Hat |
| 52 | +
|
| 53 | +notes: |
| 54 | + - Requires PyYAML on the controller (same as other cifmw.general modules). |
| 55 | +""" |
| 56 | + |
| 57 | +EXAMPLES = r""" |
| 58 | +- name: Enrich pulled report using fetched node logs |
| 59 | + cifmw.general.verify_pulled_report_crio: |
| 60 | + report_path: "{{ cifmw_env_op_images_pulled_report_path }}" |
| 61 | + log_dir: "{{ cifmw_env_op_images_crio_logs_dir }}" |
| 62 | + output_path: "{{ cifmw_env_op_images_verified_report_path }}" |
| 63 | +""" |
| 64 | + |
| 65 | +RETURN = r""" |
| 66 | +changed: |
| 67 | + description: Whether the output file was written. |
| 68 | + type: bool |
| 69 | +trusted_mirrors: |
| 70 | + description: Hostnames extracted from mirror rules in the report summary. |
| 71 | + type: list |
| 72 | + elements: str |
| 73 | +log_files: |
| 74 | + description: Number of log files read. |
| 75 | + type: int |
| 76 | +entries_with_digest: |
| 77 | + description: Image rows that had a sha256 digest in C(image_id). |
| 78 | + type: int |
| 79 | +""" |
| 80 | + |
| 81 | +import glob |
| 82 | +import os |
| 83 | +import re |
| 84 | + |
| 85 | +import yaml |
| 86 | +from ansible.module_utils.basic import AnsibleModule |
| 87 | + |
| 88 | +LOG_PATTERN = re.compile( |
| 89 | + r'msg="Pulled image: (?P<actual_uri>[^@\s]+)@(?P<id>sha256:[a-f0-9]+)"' |
| 90 | +) |
| 91 | + |
| 92 | + |
| 93 | +def _collect_log_evidence(paths, module): |
| 94 | + evidence = {} |
| 95 | + for path in paths: |
| 96 | + try: |
| 97 | + with open(path, "r") as f: |
| 98 | + for line in f: |
| 99 | + match = LOG_PATTERN.search(line) |
| 100 | + if match: |
| 101 | + evidence[match.group("id")] = match.group("actual_uri") |
| 102 | + except IOError as exc: |
| 103 | + module.fail_json( |
| 104 | + msg="Cannot read CRI-O log file {0}: {1}".format(path, str(exc)) |
| 105 | + ) |
| 106 | + return evidence |
| 107 | + |
| 108 | + |
| 109 | +def run_module(): |
| 110 | + module_args = dict( |
| 111 | + report_path=dict(type="str", required=True), |
| 112 | + output_path=dict(type="str", required=True), |
| 113 | + log_paths=dict(type="list", required=False, elements="str", default=[]), |
| 114 | + log_dir=dict(type="str", required=False), |
| 115 | + log_glob=dict(type="str", required=False, default="*.crio.log"), |
| 116 | + ) |
| 117 | + |
| 118 | + module = AnsibleModule(argument_spec=module_args, supports_check_mode=True) |
| 119 | + |
| 120 | + report_path = module.params["report_path"] |
| 121 | + output_path = module.params["output_path"] |
| 122 | + log_paths = module.params["log_paths"] or [] |
| 123 | + log_dir = module.params["log_dir"] |
| 124 | + log_glob = module.params["log_glob"] |
| 125 | + |
| 126 | + paths = list(log_paths) |
| 127 | + if log_dir: |
| 128 | + paths.extend(sorted(glob.glob(os.path.join(log_dir, log_glob)))) |
| 129 | + |
| 130 | + if not paths: |
| 131 | + module.fail_json( |
| 132 | + msg="No CRI-O log files: set log_paths and/or log_dir with matching files." |
| 133 | + ) |
| 134 | + |
| 135 | + try: |
| 136 | + with open(report_path, "r") as f: |
| 137 | + data = yaml.safe_load(f) |
| 138 | + except IOError as exc: |
| 139 | + module.fail_json( |
| 140 | + msg="Cannot read report {0}: {1}".format(report_path, str(exc)) |
| 141 | + ) |
| 142 | + except yaml.YAMLError as exc: |
| 143 | + module.fail_json(msg="Invalid YAML in report: {0}".format(str(exc))) |
| 144 | + |
| 145 | + if not isinstance(data, dict): |
| 146 | + module.fail_json(msg="Report root must be a mapping (dict).") |
| 147 | + |
| 148 | + trusted_mirrors = set() |
| 149 | + summary_section = data.get("summary") or {} |
| 150 | + for rule in summary_section.get("mirror_rules") or []: |
| 151 | + if not isinstance(rule, dict): |
| 152 | + continue |
| 153 | + mirror_url = rule.get("mirror") or "" |
| 154 | + if mirror_url: |
| 155 | + domain = mirror_url.split("/")[0].strip() |
| 156 | + if domain: |
| 157 | + trusted_mirrors.add(domain) |
| 158 | + |
| 159 | + log_evidence = _collect_log_evidence(paths, module) |
| 160 | + |
| 161 | + images_list = data.get("images") or [] |
| 162 | + entries_with_digest = 0 |
| 163 | + for img in images_list: |
| 164 | + if not isinstance(img, dict): |
| 165 | + continue |
| 166 | + image_id = img.get("image_id") or "" |
| 167 | + sha_match = re.search(r"sha256:[a-f0-9]+", image_id) |
| 168 | + if not sha_match: |
| 169 | + continue |
| 170 | + entries_with_digest += 1 |
| 171 | + img_sha = sha_match.group(0) |
| 172 | + |
| 173 | + if img_sha in log_evidence: |
| 174 | + actual_uri = log_evidence[img_sha] |
| 175 | + actual_domain = actual_uri.split("/")[0].strip() |
| 176 | + is_mirror_domain = actual_domain in trusted_mirrors |
| 177 | + img["node_verified_image_origin"] = "mirror" if is_mirror_domain else "source" |
| 178 | + img["log_evidence_uri"] = actual_uri |
| 179 | + expected_domain = img.get("expected_pull_location") or "" |
| 180 | + img["verification_status"] = ( |
| 181 | + "MATCH" if actual_domain == expected_domain else "MISMATCH" |
| 182 | + ) |
| 183 | + else: |
| 184 | + img["node_verified_image_origin"] = "cached/unknown" |
| 185 | + img["verification_status"] = "NOT_FOUND_IN_LOGS" |
| 186 | + |
| 187 | + result = dict( |
| 188 | + changed=False, |
| 189 | + trusted_mirrors=sorted(trusted_mirrors), |
| 190 | + log_files=len(paths), |
| 191 | + entries_with_digest=entries_with_digest, |
| 192 | + ) |
| 193 | + |
| 194 | + if module.check_mode: |
| 195 | + result["changed"] = True |
| 196 | + module.exit_json(**result) |
| 197 | + |
| 198 | + try: |
| 199 | + with open(output_path, "w") as f: |
| 200 | + yaml.dump(data, f, default_flow_style=False, sort_keys=False) |
| 201 | + except IOError as exc: |
| 202 | + module.fail_json( |
| 203 | + msg="Cannot write verified report {0}: {1}".format(output_path, str(exc)) |
| 204 | + ) |
| 205 | + |
| 206 | + result["changed"] = True |
| 207 | + module.exit_json(**result) |
| 208 | + |
| 209 | + |
| 210 | +def main(): |
| 211 | + run_module() |
| 212 | + |
| 213 | + |
| 214 | +if __name__ == "__main__": |
| 215 | + main() |
0 commit comments