Skip to content

Commit b9c99f3

Browse files
committed
hotloop: Optimize retry metrics tracking
Moved retry metrics collection from conditional `set_fact` tasks into the Python modules themselves using `ansible_facts`. This eliminates the performance cost of evaluating and skipping tasks for every stage. Changes: - Modules now update `hotloop_retry_metrics` fact directly via `ansible_facts` - Removed conditional "Track retry metrics" tasks from all stage types - Initialize fact at role start for clean metrics per execution - Removed redundant 'type' field from metrics structure - Metrics summary now always displays (shows "0" when no retries) Assisted-By: Claude Code/claude-4.5-sonnet Signed-off-by: Harald Jensås <hjensas@redhat.com>
1 parent 48be961 commit b9c99f3

7 files changed

Lines changed: 123 additions & 51 deletions

File tree

roles/hotloop/library/hotloop_oc_apply_file.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,22 @@
5353
- The timeout for the oc apply command
5454
type: int
5555
default: 60
56+
stage_name:
57+
description:
58+
- The name of the stage for retry metrics tracking
59+
type: str
60+
required: false
61+
resource_identifier:
62+
description:
63+
- The resource identifier for retry metrics (e.g., original manifest path from config)
64+
type: str
65+
required: false
66+
hotloop_retry_metrics:
67+
description:
68+
- Current list of retry metrics to append to
69+
type: list
70+
required: false
71+
default: []
5672
5773
author:
5874
- Harald Jensås <hjensas@redhat.com>
@@ -216,6 +232,31 @@ def save_failed_manifest(file, rc, outs, errs, timeout):
216232
return failed_base
217233

218234

235+
def add_retry_metrics_fact(
236+
result, current_metrics, stage_name, resource_identifier, retry_count, retry_time
237+
):
238+
"""Add retry metrics to ansible_facts in the result.
239+
240+
:param result: The module result dictionary to update.
241+
:param current_metrics: Current list of retry metrics.
242+
:param stage_name: The name of the stage.
243+
:param resource_identifier: The resource identifier (e.g., manifest file path).
244+
:param retry_count: Number of retries that occurred.
245+
:param retry_time: Total time spent in retries.
246+
"""
247+
result["ansible_facts"] = {
248+
"hotloop_retry_metrics": current_metrics
249+
+ [
250+
{
251+
"stage": stage_name,
252+
"file": resource_identifier,
253+
"retry_count": retry_count,
254+
"retry_time": retry_time,
255+
}
256+
]
257+
}
258+
259+
219260
def no_diff(file):
220261
"""Check if the file is different from the previously applied version.
221262
@@ -247,6 +288,9 @@ def run_module():
247288

248289
file = module.params["file"]
249290
timeout = module.params["timeout"]
291+
stage_name = module.params.get("stage_name")
292+
resource_identifier = module.params.get("resource_identifier", file)
293+
hotloop_retry_metrics = module.params.get("hotloop_retry_metrics", [])
250294

251295
try:
252296

@@ -292,6 +336,15 @@ def run_module():
292336
msg += " (WARNING: {count} retries after {time}s due to transient errors)".format(
293337
count=retry_count, time=retry_time
294338
)
339+
# Update ansible_facts with retry metrics
340+
add_retry_metrics_fact(
341+
result,
342+
hotloop_retry_metrics,
343+
stage_name,
344+
resource_identifier,
345+
retry_count,
346+
retry_time,
347+
)
295348
result["msg"] = msg
296349
result["success"] = True
297350
result["changed"] = True

roles/hotloop/library/hotloop_oc_apply_kustomize.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@
5050
- The timeout for the oc apply command
5151
type: int
5252
default: 60
53+
stage_name:
54+
description:
55+
- The name of the stage for retry metrics tracking
56+
type: str
57+
required: false
58+
resource_identifier:
59+
description:
60+
- The resource identifier for retry metrics (e.g., original directory path from config)
61+
type: str
62+
required: false
63+
hotloop_retry_metrics:
64+
description:
65+
- Current list of retry metrics to append to
66+
type: list
67+
required: false
68+
default: []
5369
5470
author:
5571
- Harald Jensås <hjensas@redhat.com>
@@ -173,6 +189,31 @@ def apply_kustomize(directory, timeout=60):
173189
return rc, outs, errs, out_lines, err_lines
174190

175191

192+
def add_retry_metrics_fact(
193+
result, current_metrics, stage_name, directory, retry_count, retry_time
194+
):
195+
"""Add retry metrics to ansible_facts in the result.
196+
197+
:param result: The module result dictionary to update.
198+
:param current_metrics: Current list of retry metrics.
199+
:param stage_name: The name of the stage.
200+
:param directory: The kustomize directory path.
201+
:param retry_count: Number of retries that occurred.
202+
:param retry_time: Total time spent in retries.
203+
"""
204+
result["ansible_facts"] = {
205+
"hotloop_retry_metrics": current_metrics
206+
+ [
207+
{
208+
"stage": stage_name,
209+
"directory": directory,
210+
"retry_count": retry_count,
211+
"retry_time": retry_time,
212+
}
213+
]
214+
}
215+
216+
176217
def validate_directory(directory):
177218
"""Validate the directory parameter.
178219
@@ -235,6 +276,9 @@ def run_module():
235276

236277
directory = module.params["directory"]
237278
timeout = module.params["timeout"]
279+
stage_name = module.params.get("stage_name")
280+
resource_identifier = module.params.get("resource_identifier", directory)
281+
hotloop_retry_metrics = module.params.get("hotloop_retry_metrics", [])
238282

239283
try:
240284
# Validate directory parameter
@@ -273,6 +317,15 @@ def run_module():
273317
msg = f"Kustomize directory {directory} applied"
274318
if retry_count > 0:
275319
msg += f" (WARNING: {retry_count} retries after {retry_time}s due to transient errors)"
320+
# Update ansible_facts with retry metrics
321+
add_retry_metrics_fact(
322+
result,
323+
hotloop_retry_metrics,
324+
stage_name,
325+
resource_identifier,
326+
retry_count,
327+
retry_time,
328+
)
276329
result["msg"] = msg
277330
result["success"] = True
278331
result["changed"] = True

roles/hotloop/tasks/kustomize.yml

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,6 @@
7575
hotloop_oc_apply_kustomize:
7676
directory: "{{ _kustomize_apply_dir }}"
7777
timeout: "{{ item.kustomize.timeout | default(60) }}"
78-
register: kustomize_result
79-
80-
- name: "Stage: {{ item.name }} :: Track retry metrics"
81-
ansible.builtin.set_fact:
82-
hotloop_retry_metrics: >-
83-
{{
84-
hotloop_retry_metrics | default([]) + [{
85-
'stage': item.name,
86-
'type': 'kustomize',
87-
'directory': item.kustomize.directory,
88-
'retry_count': kustomize_result.retry_count | default(0),
89-
'retry_time': kustomize_result.retry_time | default(0)
90-
}]
91-
}}
92-
when: kustomize_result.retry_count | default(0) | int > 0
78+
stage_name: "{{ item.name }}"
79+
resource_identifier: "{{ item.kustomize.directory }}"
80+
hotloop_retry_metrics: "{{ hotloop_retry_metrics }}"

roles/hotloop/tasks/main.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
# License for the specific language governing permissions and limitations
1515
# under the License.
1616

17+
- name: Initialize retry metrics
18+
ansible.builtin.set_fact:
19+
hotloop_retry_metrics: []
20+
1721
- name: Assert config is defined
1822
ansible.builtin.assert:
1923
that:
@@ -114,7 +118,6 @@
114118
label: "{{ item.name }}"
115119

116120
- name: Display retry metrics summary
117-
when: hotloop_retry_metrics is defined
118121
ansible.builtin.include_tasks: retry_metrics.yml
119122

120123
- name: Remove temporary hotloop work directory

roles/hotloop/tasks/retry_metrics.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
vars:
1919
_total_retries: >-
2020
{{
21-
hotloop_retry_metrics
21+
hotloop_retry_metrics | default([])
2222
| map(attribute='retry_count')
2323
| map('int')
2424
| sum
2525
}}
2626
_total_retry_time: >-
2727
{{
28-
hotloop_retry_metrics
28+
hotloop_retry_metrics | default([])
2929
| map(attribute='retry_time')
3030
| map('int')
3131
| sum
3232
}}
33-
_stages_with_retries: "{{ hotloop_retry_metrics | length }}"
33+
_stages_with_retries: "{{ hotloop_retry_metrics | default([]) | length }}"
3434
ansible.builtin.debug:
3535
msg: |
3636
@@ -42,9 +42,8 @@
4242
Total Retry Attempts: {{ _total_retries }}
4343
Total Retry Time: {{ _total_retry_time }}s
4444
45-
{% for metric in hotloop_retry_metrics %}
45+
{% for metric in hotloop_retry_metrics | default([]) %}
4646
┌─ Stage: {{ metric.stage }}
47-
│ Type: {{ metric.type }}
4847
│ Resource: {{ metric.file | default(metric.directory | default('N/A')) }}
4948
│ Retries: {{ metric.retry_count }}
5049
│ Time: {{ metric.retry_time }}s

roles/hotloop/tasks/static_manifest.yml

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,6 @@
8383
item.manifest | ansible.builtin.basename
8484
] | ansible.builtin.path_join
8585
}}
86-
register: _manifest_apply_result
87-
88-
- name: "Stage: {{ item.name }} :: Track retry metrics"
89-
ansible.builtin.set_fact:
90-
hotloop_retry_metrics: >-
91-
{{
92-
hotloop_retry_metrics | default([]) + [{
93-
'stage': item.name,
94-
'type': 'manifest',
95-
'file': item.manifest,
96-
'retry_count': _manifest_apply_result.retry_count | default(0),
97-
'retry_time': _manifest_apply_result.retry_time | default(0)
98-
}]
99-
}}
100-
when: _manifest_apply_result.retry_count | default(0) | int > 0
86+
stage_name: "{{ item.name }}"
87+
resource_identifier: "{{ item.manifest }}"
88+
hotloop_retry_metrics: "{{ hotloop_retry_metrics }}"

roles/hotloop/tasks/template_manifest.yml

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,6 @@
8888
item.j2_manifest | ansible.builtin.basename | ansible.builtin.splitext | first
8989
] | ansible.builtin.path_join
9090
}}
91-
register: _j2_manifest_apply_result
92-
93-
- name: "Stage: {{ item.name }} :: Track retry metrics"
94-
ansible.builtin.set_fact:
95-
hotloop_retry_metrics: >-
96-
{{
97-
hotloop_retry_metrics | default([]) + [{
98-
'stage': item.name,
99-
'type': 'j2_manifest',
100-
'file': item.j2_manifest,
101-
'retry_count': _j2_manifest_apply_result.retry_count | default(0),
102-
'retry_time': _j2_manifest_apply_result.retry_time | default(0)
103-
}]
104-
}}
105-
when: _j2_manifest_apply_result.retry_count | default(0) | int > 0
91+
stage_name: "{{ item.name }}"
92+
resource_identifier: "{{ item.j2_manifest }}"
93+
hotloop_retry_metrics: "{{ hotloop_retry_metrics }}"

0 commit comments

Comments
 (0)