-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathexecution.py
More file actions
364 lines (315 loc) · 14.2 KB
/
execution.py
File metadata and controls
364 lines (315 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
from __future__ import annotations
import json
from datetime import UTC, datetime
from enum import Enum
from threading import Lock
from typing import Any
from uuid import uuid4
from aws_durable_execution_sdk_python.execution import (
DurableExecutionInvocationOutput,
InvocationStatus,
)
from aws_durable_execution_sdk_python.lambda_service import (
CallbackDetails,
ErrorObject,
ExecutionDetails,
Operation,
OperationStatus,
OperationType,
OperationUpdate,
StepDetails,
)
from aws_durable_execution_sdk_python_testing.exceptions import (
IllegalStateException,
InvalidParameterValueException,
)
# Import AWS exceptions
from aws_durable_execution_sdk_python_testing.model import (
StartDurableExecutionInput,
)
from aws_durable_execution_sdk_python_testing.token import (
CheckpointToken,
CallbackToken,
)
class ExecutionStatus(Enum):
"""Execution status for API responses."""
RUNNING = "RUNNING"
SUCCEEDED = "SUCCEEDED"
FAILED = "FAILED"
STOPPED = "STOPPED"
TIMED_OUT = "TIMED_OUT"
class Execution:
"""Execution state."""
def __init__(
self,
durable_execution_arn: str,
start_input: StartDurableExecutionInput,
operations: list[Operation],
):
self.durable_execution_arn: str = durable_execution_arn
# operation is frozen, it won't mutate - no need to clone/deep-copy
self.start_input: StartDurableExecutionInput = start_input
self.operations: list[Operation] = operations
self.updates: list[OperationUpdate] = []
self.used_tokens: set[str] = set()
# TODO: this will need to persist/rehydrate depending on inmemory vs sqllite store
self._token_sequence: int = 0
self._state_lock: Lock = Lock()
self.is_complete: bool = False
self.result: DurableExecutionInvocationOutput | None = None
self.consecutive_failed_invocation_attempts: int = 0
self.close_status: ExecutionStatus | None = None
@property
def token_sequence(self) -> int:
"""Get current token sequence value."""
return self._token_sequence
def current_status(self) -> ExecutionStatus:
"""Get execution status."""
if not self.is_complete:
return ExecutionStatus.RUNNING
if not self.close_status:
msg: str = "close_status must be set when execution is complete"
raise IllegalStateException(msg)
return self.close_status
@staticmethod
def new(input: StartDurableExecutionInput) -> Execution: # noqa: A002
# make a nicer arn
# Pattern: arn:(aws[a-zA-Z-]*)?:lambda:[a-z]{2}(-gov)?-[a-z]+-\d{1}:\d{12}:durable-execution:[a-zA-Z0-9-_\.]+:[a-zA-Z0-9-_\.]+:[a-zA-Z0-9-_\.]+
# Example: arn:aws:lambda:us-east-1:123456789012:durable-execution:myDurableFunction:myDurableExecutionName:ce67da72-3701-4f83-9174-f4189d27b0a5
return Execution(
durable_execution_arn=str(uuid4()), start_input=input, operations=[]
)
def to_dict(self) -> dict[str, Any]:
"""Serialize execution to dictionary."""
return {
"DurableExecutionArn": self.durable_execution_arn,
"StartInput": self.start_input.to_dict(),
"Operations": [op.to_dict() for op in self.operations],
"Updates": [update.to_dict() for update in self.updates],
"UsedTokens": list(self.used_tokens),
"TokenSequence": self._token_sequence,
"IsComplete": self.is_complete,
"Result": self.result.to_dict() if self.result else None,
"ConsecutiveFailedInvocationAttempts": self.consecutive_failed_invocation_attempts,
"CloseStatus": self.close_status.value if self.close_status else None,
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Execution:
"""Deserialize execution from dictionary."""
# Reconstruct start_input
start_input = StartDurableExecutionInput.from_dict(data["StartInput"])
# Reconstruct operations
operations = [Operation.from_dict(op_data) for op_data in data["Operations"]]
# Create execution
execution = cls(
durable_execution_arn=data["DurableExecutionArn"],
start_input=start_input,
operations=operations,
)
# Set additional fields
execution.updates = [
OperationUpdate.from_dict(update_data) for update_data in data["Updates"]
]
execution.used_tokens = set(data["UsedTokens"])
execution._token_sequence = data["TokenSequence"] # noqa: SLF001
execution.is_complete = data["IsComplete"]
execution.result = (
DurableExecutionInvocationOutput.from_dict(data["Result"])
if data["Result"]
else None
)
execution.consecutive_failed_invocation_attempts = data[
"ConsecutiveFailedInvocationAttempts"
]
close_status_str = data.get("CloseStatus")
execution.close_status = (
ExecutionStatus(close_status_str) if close_status_str else None
)
return execution
def start(self) -> None:
if self.start_input.invocation_id is None:
msg: str = "invocation_id is required"
raise InvalidParameterValueException(msg)
with self._state_lock:
self.operations.append(
Operation(
operation_id=self.start_input.invocation_id,
parent_id=None,
name=self.start_input.execution_name,
start_timestamp=datetime.now(UTC),
operation_type=OperationType.EXECUTION,
status=OperationStatus.STARTED,
execution_details=ExecutionDetails(
input_payload=json.dumps(self.start_input.input)
),
)
)
def get_operation_execution_started(self) -> Operation:
if not self.operations:
msg: str = "execution not started."
raise IllegalStateException(msg)
return self.operations[0]
def get_new_checkpoint_token(self) -> str:
"""Generate a new checkpoint token with incremented sequence"""
with self._state_lock:
self._token_sequence += 1
new_token_sequence = self._token_sequence
token = CheckpointToken(
execution_arn=self.durable_execution_arn,
token_sequence=new_token_sequence,
)
token_str = token.to_str()
self.used_tokens.add(token_str)
return token_str
def get_navigable_operations(self) -> list[Operation]:
"""Get list of operations, but exclude child operations where the parent has already completed."""
return self.operations
def get_assertable_operations(self) -> list[Operation]:
"""Get list of operations, but exclude the EXECUTION operations"""
# TODO: this excludes EXECUTION at start, but can there be an EXECUTION at the end if there was a checkpoint with large payload?
return self.operations[1:]
def has_pending_operations(self, execution: Execution) -> bool:
"""True if execution has pending operations."""
for operation in execution.operations:
if (
operation.operation_type == OperationType.STEP
and operation.status == OperationStatus.PENDING
) or (
operation.operation_type
in [
OperationType.WAIT,
OperationType.CALLBACK,
OperationType.CHAINED_INVOKE,
]
and operation.status == OperationStatus.STARTED
):
return True
return False
def complete_success(self, result: str | None) -> None:
"""Complete execution successfully (DecisionType.COMPLETE_WORKFLOW_EXECUTION)."""
self.result = DurableExecutionInvocationOutput(
status=InvocationStatus.SUCCEEDED, result=result
)
self.is_complete = True
self.close_status = ExecutionStatus.SUCCEEDED
self._end_execution(OperationStatus.SUCCEEDED)
def complete_fail(self, error: ErrorObject) -> None:
"""Complete execution with failure (DecisionType.FAIL_WORKFLOW_EXECUTION)."""
self.result = DurableExecutionInvocationOutput(
status=InvocationStatus.FAILED, error=error
)
self.is_complete = True
self.close_status = ExecutionStatus.FAILED
self._end_execution(OperationStatus.FAILED)
def complete_timeout(self, error: ErrorObject) -> None:
"""Complete execution with timeout."""
self.result = DurableExecutionInvocationOutput(
status=InvocationStatus.FAILED, error=error
)
self.is_complete = True
self.close_status = ExecutionStatus.TIMED_OUT
self._end_execution(OperationStatus.TIMED_OUT)
def complete_stopped(self, error: ErrorObject) -> None:
"""Complete execution as terminated (TerminateWorkflowExecutionV2Request)."""
self.result = DurableExecutionInvocationOutput(
status=InvocationStatus.FAILED, error=error
)
self.is_complete = True
self.close_status = ExecutionStatus.STOPPED
self._end_execution(OperationStatus.STOPPED)
def find_operation(self, operation_id: str) -> tuple[int, Operation]:
"""Find operation by ID, return index and operation."""
for i, operation in enumerate(self.operations):
if operation.operation_id == operation_id:
return i, operation
msg: str = f"Attempting to update state of an Operation [{operation_id}] that doesn't exist"
raise IllegalStateException(msg)
def find_callback_operation(self, callback_id: str) -> tuple[int, Operation]:
"""Find callback operation by callback_id, return index and operation."""
for i, operation in enumerate(self.operations):
if (
operation.operation_type == OperationType.CALLBACK
and operation.callback_details
and operation.callback_details.callback_id == callback_id
):
return i, operation
msg: str = f"Callback operation with callback_id [{callback_id}] not found"
raise IllegalStateException(msg)
def complete_wait(self, operation_id: str) -> Operation:
"""Complete WAIT operation when timer fires."""
index, operation = self.find_operation(operation_id)
# Validate
if operation.status != OperationStatus.STARTED:
msg_wait_not_started: str = f"Attempting to transition a Wait Operation[{operation_id}] to SUCCEEDED when it's not STARTED"
raise IllegalStateException(msg_wait_not_started)
if operation.operation_type != OperationType.WAIT:
msg_not_wait: str = (
f"Expected WAIT operation, got {operation.operation_type}"
)
raise IllegalStateException(msg_not_wait)
# Thread-safe increment sequence and operation update
with self._state_lock:
self._token_sequence += 1
# Build and assign updated operation
self.operations[index] = operation.create_succeeded(
end_timestamp=datetime.now(UTC)
)
return self.operations[index]
def complete_retry(self, operation_id: str) -> Operation:
"""Complete STEP retry when timer fires."""
index, operation = self.find_operation(operation_id)
# Validate
if operation.status != OperationStatus.PENDING:
msg_step_not_pending: str = f"Attempting to transition a Step Operation[{operation_id}] to READY when it's not PENDING"
raise IllegalStateException(msg_step_not_pending)
if operation.operation_type != OperationType.STEP:
msg_not_step: str = (
f"Expected STEP operation, got {operation.operation_type}"
)
raise IllegalStateException(msg_not_step)
# Thread-safe increment sequence and operation update
with self._state_lock:
self._token_sequence += 1
updated_operation = operation.create_completed_retry()
# Assign
self.operations[index] = updated_operation
return updated_operation
def complete_callback_success(
self, callback_id: str, result: bytes | None = None
) -> Operation:
"""Complete CALLBACK operation with success."""
index, operation = self.find_callback_operation(callback_id)
if operation.status != OperationStatus.STARTED:
msg: str = f"Callback operation [{callback_id}] is not in STARTED state"
raise IllegalStateException(msg)
with self._state_lock:
self._token_sequence += 1
self.operations[index] = operation.create_callback_result(
result=result.decode() if result else None,
end_timestamp=datetime.now(UTC),
)
return self.operations[index]
def complete_callback_failure(
self, callback_id: str, error: ErrorObject
) -> Operation:
"""Complete CALLBACK operation with failure."""
index, operation = self.find_callback_operation(callback_id)
if operation.status != OperationStatus.STARTED:
msg: str = f"Callback operation [{callback_id}] is not in STARTED state"
raise IllegalStateException(msg)
with self._state_lock:
self._token_sequence += 1
self.operations[index] = operation.create_callback_failure(
error=error,
end_timestamp=datetime.now(UTC),
)
return self.operations[index]
def _end_execution(self, status: OperationStatus) -> None:
"""Set the end_timestamp on the main EXECUTION operation when execution completes."""
execution_op: Operation = self.get_operation_execution_started()
if execution_op.operation_type == OperationType.EXECUTION:
with self._state_lock:
self.operations[0] = execution_op.create_execution_end(
status=status,
end_timestamp=datetime.now(UTC),
)