Skip to content

Commit f622f23

Browse files
committed
contest: hw: treat unreachability after kexec as crash
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 74582b6 commit f622f23

1 file changed

Lines changed: 17 additions & 6 deletions

File tree

contest/hw/hwksft.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def test(binfo, rinfo, cbarg): # pylint: disable=unused-argument
187187
tree_path, kernel_version, filters=filters)
188188
set_log_file(None)
189189

190-
for attempt in range(max_crash_retries + 1):
190+
for attempt in range(max_crash_retries):
191191
attempt_sfx = f'-{attempt}' if attempt > 0 else ''
192192

193193
# Record SOL position before kexec
@@ -197,15 +197,26 @@ def test(binfo, rinfo, cbarg): # pylint: disable=unused-argument
197197
sol_start_ids[mid] = sol.get('last_id', 0)
198198

199199
# 6. kexec into new kernel
200+
kexec_failed = False
200201
with open(os.path.join(results_path, f'deploy{attempt_sfx}'), 'a',
201202
encoding='utf-8') as fp:
202203
set_log_file(fp)
203-
kexec_machine(config, machine_ips, reservation_id, mc=mc)
204+
try:
205+
kexec_machine(config, machine_ips, reservation_id, mc=mc)
206+
except TimeoutError:
207+
print(f"kexec: machine not reachable after kexec, "
208+
"treating as crash")
209+
kexec_failed = True
204210
set_log_file(None)
205211

206-
# 7. Wait for hw-worker with crash monitoring
207-
wait_result = wait_for_results(config, mc, reservation_id,
208-
machine_ids, machine_ips)
212+
if kexec_failed:
213+
wait_result = WaitResult(ok=False,
214+
error='machine not reachable after kexec',
215+
needs_power_cycle=True)
216+
else:
217+
# 7. Wait for hw-worker with crash monitoring
218+
wait_result = wait_for_results(config, mc, reservation_id,
219+
machine_ids, machine_ips)
209220

210221
# 8. Grab debug artifacts for this attempt.
211222
# If machine is hung (needs_power_cycle), it may still
@@ -241,7 +252,7 @@ def test(binfo, rinfo, cbarg): # pylint: disable=unused-argument
241252

242253
# Do the reboot even if we are about to give up, otherwise
243254
# if machine is hung we won't be able to fetch results
244-
if attempt >= max_crash_retries:
255+
if attempt >= max_crash_retries - 1:
245256
print(f"Max crash retries ({max_crash_retries}) reached, giving up")
246257
break
247258

0 commit comments

Comments
 (0)