|
9 | 9 |
|
10 | 10 | #include <stdio.h> |
11 | 11 | #include <stdlib.h> |
| 12 | +#include <string.h> |
| 13 | +#include <stdbool.h> |
| 14 | + |
| 15 | +#include "../../include/object.h" |
| 16 | +#include "../../include/value.h" |
| 17 | + |
| 18 | +// ---------------------------------------------------------------------------- |
| 19 | +// FAULT TOLERANCE: SUPERVISOR TREES (Erlang/OTP Style) |
| 20 | +// ---------------------------------------------------------------------------- |
12 | 21 |
|
13 | 22 | typedef enum { |
14 | | - STRATEGY_RESTART, |
15 | | - STRATEGY_ROLLBACK, |
16 | | - STRATEGY_ESCALATE |
17 | | -} RecoveryStrategy; |
| 23 | + STRATEGY_ONE_FOR_ONE, |
| 24 | + STRATEGY_ALL_FOR_ONE, |
| 25 | + STRATEGY_REST_FOR_ONE |
| 26 | +} RestartStrategy; |
| 27 | + |
| 28 | +typedef struct ChildSpec { |
| 29 | + int id; |
| 30 | + ObjTask* task; // The monitored task |
| 31 | + // ObjSupervisor* sub_supervisor; // Recursive |
| 32 | + int max_retries; |
| 33 | + int current_retries; |
| 34 | + struct ChildSpec* next; |
| 35 | +} ChildSpec; |
18 | 36 |
|
19 | | -typedef struct { |
| 37 | +typedef struct Supervisor { |
20 | 38 | int id; |
21 | | - RecoveryStrategy strategy; |
22 | | - int maxRetries; |
23 | | - int currentRetries; |
24 | | - // Checkpoint state would go here |
25 | | -} ResilientContext; |
| 39 | + RestartStrategy strategy; |
| 40 | + ChildSpec* children; // Linked list of children |
| 41 | + struct Supervisor* parent; // Up-link for escalation |
| 42 | +} Supervisor; |
26 | 43 |
|
27 | | -// Global supervisor state |
28 | | -static ResilientContext *activeContexts[100]; |
29 | | -static int contextCount = 0; |
| 44 | +// Global Root Supervisor |
| 45 | +static Supervisor rootSupervisor; |
| 46 | +static bool initialized = false; |
30 | 47 |
|
31 | 48 | void initSupervisor() { |
| 49 | + rootSupervisor.id = 0; |
| 50 | + rootSupervisor.strategy = STRATEGY_ONE_FOR_ONE; |
| 51 | + rootSupervisor.children = NULL; |
| 52 | + rootSupervisor.parent = NULL; |
| 53 | + initialized = true; |
32 | 54 | printf("[Supervisor] Initialized Autonomic Self-Healing Subsystem.\n"); |
33 | | - contextCount = 0; |
34 | 55 | } |
35 | 56 |
|
36 | | -void registerResilientBlock(int id, const char *strategyStr, int retryCount) { |
37 | | - printf("[Supervisor] Registering Resilient Block ID: %d, Strategy: %s\n", id, strategyStr); |
| 57 | +static ChildSpec* find_child(Supervisor* sup, int task_id) { |
| 58 | + ChildSpec* curr = sup->children; |
| 59 | + while (curr) { |
| 60 | + if (curr->id == task_id) return curr; |
| 61 | + curr = curr->next; |
| 62 | + } |
| 63 | + return NULL; |
| 64 | +} |
| 65 | + |
| 66 | +void registerTask(int taskId, ObjTask* task, int maxRetries) { |
| 67 | + if (!initialized) initSupervisor(); |
| 68 | + |
| 69 | + // In full impl, we'd specify which supervisor to attach to. |
| 70 | + // Default to Root. |
38 | 71 |
|
39 | | - ResilientContext *ctx = malloc(sizeof(ResilientContext)); |
40 | | - ctx->id = id; |
41 | | - ctx->maxRetries = retryCount; |
42 | | - ctx->currentRetries = 0; |
| 72 | + ChildSpec* child = malloc(sizeof(ChildSpec)); |
| 73 | + child->id = taskId; |
| 74 | + child->task = task; |
| 75 | + child->max_retries = maxRetries; |
| 76 | + child->current_retries = 0; |
| 77 | + child->next = rootSupervisor.children; |
| 78 | + rootSupervisor.children = child; |
43 | 79 |
|
44 | | - // Parse strategy |
45 | | - // In real impl, use enum parsing |
46 | | - ctx->strategy = STRATEGY_RESTART; |
| 80 | + printf("[Supervisor] Monitoring Task %d (Retries: %d)\n", taskId, maxRetries); |
| 81 | +} |
47 | 82 |
|
48 | | - activeContexts[contextCount++] = ctx; |
| 83 | +// Restart a specific child logic (Stub) |
| 84 | +static void restart_child(ChildSpec* child) { |
| 85 | + printf("[Supervisor] RESTARTING Child %d...\n", child->id); |
| 86 | + // In real VM: |
| 87 | + // 1. Reset Task IP/Stack |
| 88 | + // 2. Scheduler Enqueue(child->task) |
| 89 | + child->current_retries++; |
| 90 | + // Stub: |
| 91 | + // child->task->completed = false; |
| 92 | + // scheduler_enqueue(child->task); |
49 | 93 | } |
50 | 94 |
|
51 | | -void notifyPanic(int code, const char *message) { |
52 | | - printf("[Supervisor] ALERT: Panic caught! Code: %d, Message: %s\n", code, message); |
53 | | - |
54 | | - // Find active context |
55 | | - if (contextCount > 0) { |
56 | | - ResilientContext *current = activeContexts[contextCount - 1]; |
57 | | - if (current->currentRetries < current->maxRetries) { |
58 | | - printf("[Supervisor] Attempting Recovery: RESTART (%d/%d)\n", current->currentRetries + 1, current->maxRetries); |
59 | | - current->currentRetries++; |
60 | | - // Signal VM to Jump back to start of block (Checkpoint) |
| 95 | +static void handle_failure(Supervisor* sup, ChildSpec* failedChild) { |
| 96 | + printf("[Supervisor] Handling Failure for Child %d using Strategy %d\n", |
| 97 | + failedChild->id, sup->strategy); |
| 98 | + |
| 99 | + if (failedChild->current_retries >= failedChild->max_retries) { |
| 100 | + printf("[Supervisor] Child %d exceeded max retries (%d). ESCALATING.\n", |
| 101 | + failedChild->id, failedChild->max_retries); |
| 102 | + // Escalate to parent |
| 103 | + if (sup->parent) { |
| 104 | + // propagate panic up |
61 | 105 | } else { |
62 | | - printf("[Supervisor] Recovery Failed. Escalating...\n"); |
63 | | - // Escalate |
| 106 | + printf("[Supervisor] Root Supervisor Gave Up. SYSTEM CRASH.\n"); |
| 107 | + exit(1); |
64 | 108 | } |
| 109 | + return; |
| 110 | + } |
| 111 | + |
| 112 | + switch (sup->strategy) { |
| 113 | + case STRATEGY_ONE_FOR_ONE: |
| 114 | + restart_child(failedChild); |
| 115 | + break; |
| 116 | + |
| 117 | + case STRATEGY_ALL_FOR_ONE: |
| 118 | + // Restart ALL children |
| 119 | + { |
| 120 | + ChildSpec* curr = sup->children; |
| 121 | + while (curr) { |
| 122 | + restart_child(curr); |
| 123 | + curr = curr->next; |
| 124 | + } |
| 125 | + } |
| 126 | + break; |
| 127 | + |
| 128 | + default: |
| 129 | + break; |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +// Hook called by VM exception handler |
| 134 | +void notifyPanic(int taskId, const char *message) { |
| 135 | + if (!initialized) { |
| 136 | + printf("Panic before supervisor init: %s\n", message); |
| 137 | + exit(1); |
| 138 | + } |
| 139 | + |
| 140 | + printf("[Supervisor] ALERT: Task %d Panicked! Msg: %s\n", taskId, message); |
| 141 | + |
| 142 | + // Find who owns this task |
| 143 | + // Simplified: Search root |
| 144 | + ChildSpec* match = find_child(&rootSupervisor, taskId); |
| 145 | + if (match) { |
| 146 | + handle_failure(&rootSupervisor, match); |
65 | 147 | } else { |
66 | | - printf("[Supervisor] No active resilient context. System Crash.\n"); |
67 | | - exit(1); |
| 148 | + printf("[Supervisor] Unsupervised Task %d crashed. Ignoring.\n", taskId); |
68 | 149 | } |
69 | 150 | } |
| 151 | + |
| 152 | +// Compatibility shim for existing calls |
| 153 | +void registerResilientBlock(int id, const char *strategyStr, int retryCount) { |
| 154 | + // Map old API to new task registration |
| 155 | + // We assume ID maps to a task somehow or creates a dummy task wrapper |
| 156 | + registerTask(id, NULL, retryCount); |
| 157 | +} |
0 commit comments