Skip to content

Commit e5824f1

Browse files
committed
fix: BatchResult completion reason logic
Fix BatchResult.from_items() to match JavaScript SDK behavior. The previous implementation incorrectly checked conditions in the wrong order, causing incorrect completion reasons for concurrent operations with failure tolerance configurations. The checkpointed completion reason is preserved during replay, so existing executions are unaffected. Code with conditional logic based on completion_reason might see different values after this update. Example: With 3 items (1 success, 2 failures) and tolerated_failure_count=1: - Before: ALL_COMPLETED (incorrect - all items finished) - After: FAILURE_TOLERANCE_EXCEEDED (correct - tolerance breached) Changes: - Extract completion reason logic to _get_completion_reason() method - Check failure tolerance BEFORE checking if all completed - Implement proper fail-fast when no completion config provided - Add comprehensive unit tests covering all edge cases This ensures tolerance checks take precedence over success criteria, preventing operations from incorrectly reporting ALL_COMPLETED when failure tolerance has been exceeded. closes #280
1 parent e4bfd00 commit e5824f1

2 files changed

Lines changed: 271 additions & 57 deletions

File tree

src/aws_durable_execution_sdk_python/concurrency/models.py

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,85 @@ def from_dict(
114114
completion_reason = CompletionReason(completion_reason_value)
115115
return cls(batch_items, completion_reason)
116116

117+
@staticmethod
118+
def _get_completion_reason(
119+
failure_count: int,
120+
success_count: int,
121+
completed_count: int,
122+
total_count: int,
123+
completion_config: CompletionConfig | None,
124+
) -> CompletionReason:
125+
"""
126+
Determine completion reason based on completion counts.
127+
128+
Logic order:
129+
1. Check failure tolerance FIRST (before checking if all completed)
130+
2. Check if all completed
131+
3. Check if minimum successful reached
132+
4. Default to ALL_COMPLETED
133+
134+
Args:
135+
failure_count: Number of failed items
136+
success_count: Number of succeeded items
137+
completed_count: Total completed (succeeded + failed)
138+
total_count: Total number of items
139+
completion_config: Optional completion configuration
140+
141+
Returns:
142+
CompletionReason enum value
143+
"""
144+
# STEP 1: Check tolerance first, before checking if all completed
145+
146+
# Handle fail-fast behavior (no completion config or empty completion config)
147+
if completion_config is None:
148+
if failure_count > 0:
149+
return CompletionReason.FAILURE_TOLERANCE_EXCEEDED
150+
else:
151+
# Check if completion config has any criteria set
152+
has_any_completion_criteria = (
153+
completion_config.min_successful is not None
154+
or completion_config.tolerated_failure_count is not None
155+
or completion_config.tolerated_failure_percentage is not None
156+
)
157+
158+
if not has_any_completion_criteria:
159+
# Empty completion config - fail fast on any failure
160+
if failure_count > 0:
161+
return CompletionReason.FAILURE_TOLERANCE_EXCEEDED
162+
else:
163+
# Check specific tolerance thresholds
164+
if (
165+
completion_config.tolerated_failure_count is not None
166+
and failure_count > completion_config.tolerated_failure_count
167+
):
168+
return CompletionReason.FAILURE_TOLERANCE_EXCEEDED
169+
170+
if (
171+
completion_config.tolerated_failure_percentage is not None
172+
and total_count > 0
173+
):
174+
failure_percentage = (failure_count / total_count) * 100
175+
if (
176+
failure_percentage
177+
> completion_config.tolerated_failure_percentage
178+
):
179+
return CompletionReason.FAILURE_TOLERANCE_EXCEEDED
180+
181+
# STEP 2: Check if all completed
182+
if completed_count == total_count:
183+
return CompletionReason.ALL_COMPLETED
184+
185+
# STEP 3: Check if minimum successful reached
186+
if (
187+
completion_config is not None
188+
and completion_config.min_successful is not None
189+
and success_count >= completion_config.min_successful
190+
):
191+
return CompletionReason.MIN_SUCCESSFUL_REACHED
192+
193+
# STEP 4: Default
194+
return CompletionReason.ALL_COMPLETED
195+
117196
@classmethod
118197
def from_items(
119198
cls,
@@ -123,12 +202,8 @@ def from_items(
123202
"""
124203
Infer completion reason based on batch item statuses and completion config.
125204
126-
This follows the same logic as the TypeScript implementation:
127-
- If all items completed: ALL_COMPLETED
128-
- If minSuccessful threshold met and not all completed: MIN_SUCCESSFUL_REACHED
129-
- Otherwise: FAILURE_TOLERANCE_EXCEEDED
205+
This follows the same logic as the TypeScript implementation.
130206
"""
131-
132207
statuses = (item.status for item in items)
133208
counts = Counter(statuses)
134209
succeeded_count = counts.get(BatchItemStatus.SUCCEEDED, 0)
@@ -138,18 +213,14 @@ def from_items(
138213
completed_count = succeeded_count + failed_count
139214
total_count = started_count + completed_count
140215

141-
# If all items completed (no started items), it's ALL_COMPLETED
142-
if completed_count == total_count:
143-
completion_reason = CompletionReason.ALL_COMPLETED
144-
elif ( # If we have completion config and minSuccessful threshold is met
145-
completion_config
146-
and (min_successful := completion_config.min_successful) is not None
147-
and succeeded_count >= min_successful
148-
):
149-
completion_reason = CompletionReason.MIN_SUCCESSFUL_REACHED
150-
else:
151-
# Otherwise, assume failure tolerance was exceeded
152-
completion_reason = CompletionReason.FAILURE_TOLERANCE_EXCEEDED
216+
# Determine completion reason using the same logic as JavaScript SDK
217+
completion_reason = cls._get_completion_reason(
218+
failure_count=failed_count,
219+
success_count=succeeded_count,
220+
completed_count=completed_count,
221+
total_count=total_count,
222+
completion_config=completion_config,
223+
)
153224

154225
return cls(items, completion_reason)
155226

0 commit comments

Comments
 (0)