Skip to content

Commit 8c1beab

Browse files
committed
adding shared risk groups
1 parent 00b812a commit 8c1beab

2 files changed

Lines changed: 242 additions & 86 deletions

File tree

ngraph/failure_policy.py

Lines changed: 125 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
from dataclasses import dataclass, field
22
from typing import Any, Dict, List, Literal
33
from random import random, sample
4+
from collections import defaultdict, deque
45

56

67
@dataclass
78
class FailureCondition:
89
"""
910
A single condition for matching an entity's attribute with an operator and value.
1011
11-
Example usage (YAML-ish):
12+
Example usage (YAML):
1213
13-
.. code-block:: yaml
14-
15-
conditions:
16-
- attr: "capacity"
17-
operator: "<"
18-
value: 100
14+
conditions:
15+
- attr: "capacity"
16+
operator: "<"
17+
value: 100
1918
2019
Attributes:
2120
attr (str):
22-
The name of the attribute to inspect (e.g. "type", "capacity").
21+
The name of the attribute to inspect (e.g., "type", "capacity").
2322
operator (str):
24-
The comparison operator: "==", "!=", "<", "<=", ">", ">=".
23+
The comparison operator: "==", "!=", "<", "<=", ">", ">=", "contains",
24+
"not_contains", "any_value", or "no_value".
2525
value (Any):
26-
The value to compare against (e.g. "node", 100, True, etc.).
26+
The value to compare against (e.g., "node", 100, True, etc.).
2727
"""
2828

2929
attr: str
@@ -34,35 +34,24 @@ class FailureCondition:
3434
@dataclass
3535
class FailureRule:
3636
"""
37-
A single rule defining how to match entities and then select them for failure.
38-
39-
* conditions: list of conditions
40-
* logic: how to combine conditions ("and", "or", "any")
41-
* rule_type: how to pick from matched entities ("random", "choice", "all")
42-
* probability: used by "random" (a float in [0,1])
43-
* count: used by "choice" (e.g. pick 2)
44-
45-
When multiple FailureRules appear in a FailurePolicy, the final
46-
set of failures is the **union** of all entities selected by each rule.
37+
Defines how to match entities and then select them for failure.
4738
4839
Attributes:
4940
conditions (List[FailureCondition]):
5041
A list of conditions to filter matching entities.
5142
logic (Literal["and", "or", "any"]):
52-
- "and": All conditions must be true.
53-
- "or": At least one condition is true.
54-
- "any": Skip condition checks; everything is matched.
43+
- "and": All conditions must be true for a match.
44+
- "or": At least one condition is true for a match.
45+
- "any": Skip condition checks and match all entities.
5546
rule_type (Literal["random", "choice", "all"]):
5647
The selection strategy among the matched set:
57-
- "random": Each matched entity is chosen independently
58-
with probability = `probability`.
59-
- "choice": Pick exactly `count` items from the matched set
60-
(randomly sampled).
48+
- "random": Each matched entity is chosen with probability=`probability`.
49+
- "choice": Pick exactly `count` items (random sample).
6150
- "all": Select every matched entity.
6251
probability (float):
63-
Probability in [0,1], used only if `rule_type="random"`.
52+
Probability in [0,1], used if `rule_type="random"`.
6453
count (int):
65-
Number of matched entities to pick, used only if `rule_type="choice"`.
54+
Number of entities to pick if `rule_type="choice"`.
6655
"""
6756

6857
conditions: List[FailureCondition] = field(default_factory=list)
@@ -73,29 +62,33 @@ class FailureRule:
7362

7463
def __post_init__(self) -> None:
7564
"""
76-
Validate certain fields after initialization.
65+
Validate the probability if rule_type is 'random'.
7766
"""
7867
if self.rule_type == "random":
79-
if not (0.0 <= self.probability <= 1.0):
68+
if not 0.0 <= self.probability <= 1.0:
8069
raise ValueError(
81-
f"probability={self.probability} must be within [0,1] for rule_type='random'."
70+
f"probability={self.probability} must be within [0,1] "
71+
f"for rule_type='random'."
8272
)
8373

8474

8575
@dataclass
8676
class FailurePolicy:
8777
"""
88-
A container for multiple FailureRules and arbitrary metadata in `attrs`.
78+
A container for multiple FailureRules plus optional metadata in `attrs`.
8979
90-
The method :meth:`apply_failures` merges nodes and links into a single
91-
dictionary (by their unique ID), then applies each rule in turn. The final
92-
result is the union of all failures from each rule.
80+
The main entry point is `apply_failures`, which:
81+
1) Merges all nodes and links into a single entity dictionary.
82+
2) Applies each FailureRule, collecting a set of failed entity IDs.
83+
3) Optionally expands failures to include entities sharing a
84+
'shared_risk_group' with any entity that failed.
9385
9486
Attributes:
9587
rules (List[FailureRule]):
9688
A list of FailureRules to apply.
9789
attrs (Dict[str, Any]):
9890
Arbitrary metadata about this policy (e.g. "name", "description").
91+
If `fail_shared_risk_groups=True`, then shared-risk expansion is used.
9992
"""
10093

10194
rules: List[FailureRule] = field(default_factory=list)
@@ -107,28 +100,29 @@ def apply_failures(
107100
links: Dict[str, Dict[str, Any]],
108101
) -> List[str]:
109102
"""
110-
Identify which entities (nodes or links) fail, given the defined rules.
111-
Returns a combined list (union) of all entity IDs that fail.
103+
Identify which entities fail given the defined rules, then optionally
104+
expand by shared-risk groups.
112105
113106
Args:
114-
nodes: A mapping of node_name -> node.attrs (must have "type"="node").
115-
links: A mapping of link_id -> link.attrs (must have "type"="link").
107+
nodes: Dict[node_name, node_attributes]. Must have 'type'="node".
108+
links: Dict[link_id, link_attributes]. Must have 'type'="link".
116109
117110
Returns:
118-
A list of failed entity IDs (node names or link IDs).
111+
A list of failed entity IDs (union of all rule matches).
119112
"""
120-
# Merge nodes and links into a single map of entity_id -> entity_attrs
121-
# Example: { "SEA": {...}, "SEA-DEN-xxx": {...} }
122113
all_entities = {**nodes, **links}
123-
124114
failed_entities = set()
125115

126-
# Apply each rule, union all selected entities
116+
# 1) Collect matched failures from each rule
127117
for rule in self.rules:
128118
matched = self._match_entities(all_entities, rule.conditions, rule.logic)
129119
selected = self._select_entities(matched, all_entities, rule)
130120
failed_entities.update(selected)
131121

122+
# 2) Optionally expand failures by shared-risk group
123+
if self.attrs.get("fail_shared_risk_groups", False):
124+
self._expand_shared_risk_groups(failed_entities, all_entities)
125+
132126
return list(failed_entities)
133127

134128
def _match_entities(
@@ -138,17 +132,24 @@ def _match_entities(
138132
logic: str,
139133
) -> List[str]:
140134
"""
141-
Find which entity IDs satisfy the given conditions
142-
combined by 'and'/'or' logic (or 'any' to skip checks).
135+
Return all entity IDs matching the given conditions based on 'and'/'or'/'any' logic.
143136
144137
Args:
145-
all_entities: Mapping of entity_id -> attribute dict.
146-
conditions: List of FailureCondition to apply.
138+
all_entities: {entity_id: attributes}.
139+
conditions: List of FailureCondition to evaluate.
147140
logic: "and", "or", or "any".
148141
149142
Returns:
150-
A list of entity IDs that match according to the logic.
143+
A list of matching entity IDs.
151144
"""
145+
if logic == "any":
146+
# Skip condition checks; everything matches.
147+
return list(all_entities.keys())
148+
149+
if not conditions:
150+
# If zero conditions, we match nothing unless logic='any'.
151+
return []
152+
152153
matched = []
153154
for entity_id, attr_dict in all_entities.items():
154155
if self._evaluate_conditions(attr_dict, conditions, logic):
@@ -162,32 +163,22 @@ def _evaluate_conditions(
162163
logic: str,
163164
) -> bool:
164165
"""
165-
Check if the given entity meets all or any of the conditions, or if logic='any'.
166+
Evaluate multiple conditions on a single entity. All or any condition(s)
167+
must pass, depending on 'logic'.
166168
167169
Args:
168-
entity_attrs: Attributes dict for one entity (node or link).
169-
conditions: List of FailureCondition.
170-
logic: "and", "or", or "any".
170+
entity_attrs: Attribute dict for one entity.
171+
conditions: List of FailureCondition to test.
172+
logic: "and" or "or".
171173
172174
Returns:
173175
True if conditions pass, else False.
174176
"""
175-
if logic == "any":
176-
# 'any' means skip condition checks and always match
177-
return True
178-
if not conditions:
179-
# If we have zero conditions, we treat this as no match unless logic='any'
180-
return False
177+
if logic not in ("and", "or"):
178+
raise ValueError(f"Unsupported logic: {logic}")
181179

182-
# Evaluate each condition
183180
results = [_evaluate_condition(entity_attrs, c) for c in conditions]
184-
185-
if logic == "and":
186-
return all(results)
187-
elif logic == "or":
188-
return any(results)
189-
else:
190-
raise ValueError(f"Unsupported logic: {logic}")
181+
return all(results) if logic == "and" else any(results)
191182

192183
@staticmethod
193184
def _select_entities(
@@ -196,44 +187,78 @@ def _select_entities(
196187
rule: FailureRule,
197188
) -> List[str]:
198189
"""
199-
From the matched set, pick which entities fail according to rule_type.
190+
From the matched IDs, pick which entities fail under the given rule_type.
200191
201192
Args:
202-
entity_ids: IDs that matched the rule's conditions.
203-
all_entities: Full entity dictionary (for potential future use).
204-
rule: The FailureRule specifying random/choice/all selection.
193+
entity_ids: Matched entity IDs from _match_entities.
194+
all_entities: Full entity map (unused now, but available if needed).
195+
rule: The FailureRule specifying 'random', 'choice', or 'all'.
205196
206197
Returns:
207-
The final list of entity IDs that fail under this rule.
198+
A list of selected entity IDs to fail.
208199
"""
200+
if not entity_ids:
201+
return []
202+
209203
if rule.rule_type == "random":
210-
# Each entity is chosen with probability=rule.probability
211-
return [ent_id for ent_id in entity_ids if random() < rule.probability]
204+
return [eid for eid in entity_ids if random() < rule.probability]
212205
elif rule.rule_type == "choice":
213-
# Sample exactly 'count' from the matched set (or fewer if matched < count)
214206
count = min(rule.count, len(entity_ids))
215-
# Use sorted(...) for deterministic results
216207
return sample(sorted(entity_ids), k=count)
217208
elif rule.rule_type == "all":
218209
return entity_ids
219210
else:
220211
raise ValueError(f"Unsupported rule_type: {rule.rule_type}")
221212

213+
def _expand_shared_risk_groups(
214+
self, failed_entities: set[str], all_entities: Dict[str, Dict[str, Any]]
215+
) -> None:
216+
"""
217+
Expand the 'failed_entities' set so that if an entity has
218+
shared_risk_group=X, all other entities with the same group also fail.
219+
220+
This is done iteratively until no new failures are found.
221+
222+
Args:
223+
failed_entities: Set of entity IDs already marked as failed.
224+
all_entities: Map of entity_id -> attributes (which may contain 'shared_risk_group').
225+
"""
226+
# Pre-compute SRG -> entity IDs mapping for efficiency
227+
srg_map = defaultdict(set)
228+
for eid, attrs in all_entities.items():
229+
srg = attrs.get("shared_risk_group")
230+
if srg:
231+
srg_map[srg].add(eid)
232+
233+
queue = deque(failed_entities)
234+
while queue:
235+
current = queue.popleft()
236+
current_srg = all_entities[current].get("shared_risk_group")
237+
if not current_srg:
238+
continue
239+
240+
# All entities in the same SRG should fail
241+
for other_eid in srg_map[current_srg]:
242+
if other_eid not in failed_entities:
243+
failed_entities.add(other_eid)
244+
queue.append(other_eid)
245+
222246

223247
def _evaluate_condition(entity: Dict[str, Any], cond: FailureCondition) -> bool:
224248
"""
225-
Evaluate one FailureCondition (attr, operator, value) against entity attributes.
249+
Evaluate a single FailureCondition against an entity's attributes.
250+
251+
Operators supported:
252+
==, !=, <, <=, >, >=, contains, not_contains, any_value, no_value
226253
227254
Args:
228-
entity: The entity's attributes (e.g., node.attrs or link.attrs).
255+
entity: Entity attributes (e.g., node.attrs or link.attrs).
229256
cond: FailureCondition specifying (attr, operator, value).
230257
231258
Returns:
232259
True if the condition passes, else False.
233-
234-
Raises:
235-
ValueError: If the operator is not recognized.
236260
"""
261+
has_attr = cond.attr in entity
237262
derived_value = entity.get(cond.attr, None)
238263
op = cond.operator
239264

@@ -242,12 +267,26 @@ def _evaluate_condition(entity: Dict[str, Any], cond: FailureCondition) -> bool:
242267
elif op == "!=":
243268
return derived_value != cond.value
244269
elif op == "<":
245-
return derived_value < cond.value
270+
return (derived_value is not None) and (derived_value < cond.value)
246271
elif op == "<=":
247-
return derived_value <= cond.value
272+
return (derived_value is not None) and (derived_value <= cond.value)
248273
elif op == ">":
249-
return derived_value > cond.value
274+
return (derived_value is not None) and (derived_value > cond.value)
250275
elif op == ">=":
251-
return derived_value >= cond.value
276+
return (derived_value is not None) and (derived_value >= cond.value)
277+
elif op == "contains":
278+
if derived_value is None:
279+
return False
280+
return cond.value in derived_value
281+
elif op == "not_contains":
282+
if derived_value is None:
283+
return True
284+
return cond.value not in derived_value
285+
elif op == "any_value":
286+
# Pass if the attribute key exists, even if the value is None
287+
return has_attr
288+
elif op == "no_value":
289+
# Pass if the attribute key is missing or the value is None
290+
return (not has_attr) or (derived_value is None)
252291
else:
253292
raise ValueError(f"Unsupported operator: {op}")

0 commit comments

Comments
 (0)