Skip to content

Commit 9d2a859

Browse files
committed
fix: wait_for_condition attempt count
Fix off-by-one error in wait_for_condition attempt numbering where the sequence was 1, 1, 2, 3... instead of the correct 1, 2, 3, 4... The bug occurred because the code read step_details.attempt directly, which represents completed attempts, not the current attempt number. The current attempt should always be checkpointed_attempts + 1. Changes: - Update attempt calculation in wait_for_condition.py - Add explanatory comment about checkpoint semantics - Update test assertion to expect attempt=4 when checkpoint has 3 - Add comprehensive test for monotonic attempt sequence closes #288
1 parent 58f8224 commit 9d2a859

2 files changed

Lines changed: 124 additions & 3 deletions

File tree

src/aws_durable_execution_sdk_python/operation/wait_for_condition.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,11 @@ def execute(self, checkpointed_result: CheckpointedResult) -> T:
170170
else:
171171
current_state = self.config.initial_state
172172

173-
# Get attempt number
173+
# Get attempt number - current attempt is checkpointed attempts + 1
174+
# The checkpoint stores completed attempts, so the current attempt being executed is one more
174175
attempt: int = 1
175176
if checkpointed_result.operation and checkpointed_result.operation.step_details:
176-
attempt = checkpointed_result.operation.step_details.attempt
177+
attempt = checkpointed_result.operation.step_details.attempt + 1
177178

178179
try:
179180
# Execute the check function with the injected logger

tests/operation/wait_for_condition_test.py

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,127 @@ def wait_strategy(state, attempt):
596596
context_logger=mock_logger,
597597
)
598598

599-
assert captured_attempt == 3
599+
assert captured_attempt == 4
600+
601+
602+
def test_wait_for_condition_attempt_sequence_is_monotonic():
603+
"""Test that attempt numbers form a monotonically increasing sequence: 1, 2, 3, 4...
604+
605+
This test validates the fix for the attempt counting bug where:
606+
- First execution (no checkpoint): attempt = 1
607+
- After first retry (checkpoint.attempt = 1): attempt = 2
608+
- After second retry (checkpoint.attempt = 2): attempt = 3
609+
- After third retry (checkpoint.attempt = 3): attempt = 4
610+
611+
The current attempt should always be: checkpointed_attempts + 1
612+
"""
613+
mock_state = Mock(spec=ExecutionState)
614+
mock_state.durable_execution_arn = "arn:aws:test"
615+
616+
mock_logger = Mock(spec=Logger)
617+
mock_logger.with_log_info.return_value = mock_logger
618+
619+
op_id = OperationIdentifier("op1", None, "test_wait")
620+
621+
def check_func(state, context):
622+
return state + 1
623+
624+
captured_attempts = []
625+
626+
def wait_strategy(state, attempt):
627+
captured_attempts.append(attempt)
628+
return WaitForConditionDecision.stop_polling()
629+
630+
config = WaitForConditionConfig(initial_state=5, wait_strategy=wait_strategy)
631+
632+
# Test 1: First execution (no checkpoint exists)
633+
mock_state.get_checkpoint_result.return_value = (
634+
CheckpointedResult.create_not_found()
635+
)
636+
637+
wait_for_condition_handler(
638+
state=mock_state,
639+
operation_identifier=op_id,
640+
check=check_func,
641+
config=config,
642+
context_logger=mock_logger,
643+
)
644+
645+
assert captured_attempts[-1] == 1, "First execution should have attempt=1"
646+
647+
# Test 2: After first retry (checkpoint has attempt=1)
648+
operation = Operation(
649+
operation_id="op1",
650+
operation_type=OperationType.STEP,
651+
status=OperationStatus.STARTED,
652+
step_details=StepDetails(result=json.dumps(10), attempt=1),
653+
)
654+
mock_result = CheckpointedResult.create_from_operation(operation)
655+
mock_state.get_checkpoint_result.return_value = mock_result
656+
657+
wait_for_condition_handler(
658+
state=mock_state,
659+
operation_identifier=op_id,
660+
check=check_func,
661+
config=config,
662+
context_logger=mock_logger,
663+
)
664+
665+
assert (
666+
captured_attempts[-1] == 2
667+
), "After first retry (checkpoint.attempt=1), current attempt should be 2"
668+
669+
# Test 3: After second retry (checkpoint has attempt=2)
670+
operation = Operation(
671+
operation_id="op1",
672+
operation_type=OperationType.STEP,
673+
status=OperationStatus.STARTED,
674+
step_details=StepDetails(result=json.dumps(10), attempt=2),
675+
)
676+
mock_result = CheckpointedResult.create_from_operation(operation)
677+
mock_state.get_checkpoint_result.return_value = mock_result
678+
679+
wait_for_condition_handler(
680+
state=mock_state,
681+
operation_identifier=op_id,
682+
check=check_func,
683+
config=config,
684+
context_logger=mock_logger,
685+
)
686+
687+
assert (
688+
captured_attempts[-1] == 3
689+
), "After second retry (checkpoint.attempt=2), current attempt should be 3"
690+
691+
# Test 4: After third retry (checkpoint has attempt=3)
692+
operation = Operation(
693+
operation_id="op1",
694+
operation_type=OperationType.STEP,
695+
status=OperationStatus.STARTED,
696+
step_details=StepDetails(result=json.dumps(10), attempt=3),
697+
)
698+
mock_result = CheckpointedResult.create_from_operation(operation)
699+
mock_state.get_checkpoint_result.return_value = mock_result
700+
701+
wait_for_condition_handler(
702+
state=mock_state,
703+
operation_identifier=op_id,
704+
check=check_func,
705+
config=config,
706+
context_logger=mock_logger,
707+
)
708+
709+
assert (
710+
captured_attempts[-1] == 4
711+
), "After third retry (checkpoint.attempt=3), current attempt should be 4"
712+
713+
# Verify the complete sequence is monotonically increasing
714+
assert captured_attempts == [
715+
1,
716+
2,
717+
3,
718+
4,
719+
], f"Expected [1, 2, 3, 4] but got {captured_attempts}"
600720

601721

602722
def test_wait_for_condition_state_passed_to_strategy():

0 commit comments

Comments
 (0)