11from dataclasses import dataclass , field
22from typing import Any , Dict , List , Literal
33from random import random , sample
4+ from collections import defaultdict , deque
45
56
67@dataclass
78class FailureCondition :
89 """
910 A single condition for matching an entity's attribute with an operator and value.
1011
11- Example usage (YAML-ish ):
12+ Example usage (YAML):
1213
13- .. code-block:: yaml
14-
15- conditions:
16- - attr: "capacity"
17- operator: "<"
18- value: 100
14+ conditions:
15+ - attr: "capacity"
16+ operator: "<"
17+ value: 100
1918
2019 Attributes:
2120 attr (str):
22- The name of the attribute to inspect (e.g. "type", "capacity").
21+ The name of the attribute to inspect (e.g., "type", "capacity").
2322 operator (str):
24- The comparison operator: "==", "!=", "<", "<=", ">", ">=".
23+ The comparison operator: "==", "!=", "<", "<=", ">", ">=", "contains",
24+ "not_contains", "any_value", or "no_value".
2525 value (Any):
26- The value to compare against (e.g. "node", 100, True, etc.).
26+ The value to compare against (e.g., "node", 100, True, etc.).
2727 """
2828
2929 attr : str
@@ -34,35 +34,24 @@ class FailureCondition:
3434@dataclass
3535class FailureRule :
3636 """
37- A single rule defining how to match entities and then select them for failure.
38-
39- * conditions: list of conditions
40- * logic: how to combine conditions ("and", "or", "any")
41- * rule_type: how to pick from matched entities ("random", "choice", "all")
42- * probability: used by "random" (a float in [0,1])
43- * count: used by "choice" (e.g. pick 2)
44-
45- When multiple FailureRules appear in a FailurePolicy, the final
46- set of failures is the **union** of all entities selected by each rule.
37+ Defines how to match entities and then select them for failure.
4738
4839 Attributes:
4940 conditions (List[FailureCondition]):
5041 A list of conditions to filter matching entities.
5142 logic (Literal["and", "or", "any"]):
52- - "and": All conditions must be true.
53- - "or": At least one condition is true.
54- - "any": Skip condition checks; everything is matched .
43+ - "and": All conditions must be true for a match .
44+ - "or": At least one condition is true for a match .
45+ - "any": Skip condition checks and match all entities .
5546 rule_type (Literal["random", "choice", "all"]):
5647 The selection strategy among the matched set:
57- - "random": Each matched entity is chosen independently
58- with probability = `probability`.
59- - "choice": Pick exactly `count` items from the matched set
60- (randomly sampled).
48+ - "random": Each matched entity is chosen with probability=`probability`.
49+ - "choice": Pick exactly `count` items (random sample).
6150 - "all": Select every matched entity.
6251 probability (float):
63- Probability in [0,1], used only if `rule_type="random"`.
52+ Probability in [0,1], used if `rule_type="random"`.
6453 count (int):
65- Number of matched entities to pick, used only if `rule_type="choice"`.
54+ Number of entities to pick if `rule_type="choice"`.
6655 """
6756
6857 conditions : List [FailureCondition ] = field (default_factory = list )
@@ -73,29 +62,33 @@ class FailureRule:
7362
7463 def __post_init__ (self ) -> None :
7564 """
76- Validate certain fields after initialization .
65+ Validate the probability if rule_type is 'random' .
7766 """
7867 if self .rule_type == "random" :
79- if not ( 0.0 <= self .probability <= 1.0 ) :
68+ if not 0.0 <= self .probability <= 1.0 :
8069 raise ValueError (
81- f"probability={ self .probability } must be within [0,1] for rule_type='random'."
70+ f"probability={ self .probability } must be within [0,1] "
71+ f"for rule_type='random'."
8272 )
8373
8474
8575@dataclass
8676class FailurePolicy :
8777 """
88- A container for multiple FailureRules and arbitrary metadata in `attrs`.
78+ A container for multiple FailureRules plus optional metadata in `attrs`.
8979
90- The method :meth:`apply_failures` merges nodes and links into a single
91- dictionary (by their unique ID), then applies each rule in turn. The final
92- result is the union of all failures from each rule.
80+ The main entry point is `apply_failures`, which:
81+ 1) Merges all nodes and links into a single entity dictionary.
82+ 2) Applies each FailureRule, collecting a set of failed entity IDs.
83+ 3) Optionally expands failures to include entities sharing a
84+ 'shared_risk_group' with any entity that failed.
9385
9486 Attributes:
9587 rules (List[FailureRule]):
9688 A list of FailureRules to apply.
9789 attrs (Dict[str, Any]):
9890 Arbitrary metadata about this policy (e.g. "name", "description").
91+ If `fail_shared_risk_groups=True`, then shared-risk expansion is used.
9992 """
10093
10194 rules : List [FailureRule ] = field (default_factory = list )
@@ -107,28 +100,29 @@ def apply_failures(
107100 links : Dict [str , Dict [str , Any ]],
108101 ) -> List [str ]:
109102 """
110- Identify which entities (nodes or links) fail, given the defined rules.
111- Returns a combined list (union) of all entity IDs that fail .
103+ Identify which entities fail given the defined rules, then optionally
104+ expand by shared-risk groups .
112105
113106 Args:
114- nodes: A mapping of node_name -> node.attrs (must have " type" ="node") .
115- links: A mapping of link_id -> link.attrs (must have " type" ="link") .
107+ nodes: Dict[ node_name, node_attributes]. Must have ' type' ="node".
108+ links: Dict[ link_id, link_attributes]. Must have ' type' ="link".
116109
117110 Returns:
118- A list of failed entity IDs (node names or link IDs ).
111+ A list of failed entity IDs (union of all rule matches ).
119112 """
120- # Merge nodes and links into a single map of entity_id -> entity_attrs
121- # Example: { "SEA": {...}, "SEA-DEN-xxx": {...} }
122113 all_entities = {** nodes , ** links }
123-
124114 failed_entities = set ()
125115
126- # Apply each rule, union all selected entities
116+ # 1) Collect matched failures from each rule
127117 for rule in self .rules :
128118 matched = self ._match_entities (all_entities , rule .conditions , rule .logic )
129119 selected = self ._select_entities (matched , all_entities , rule )
130120 failed_entities .update (selected )
131121
122+ # 2) Optionally expand failures by shared-risk group
123+ if self .attrs .get ("fail_shared_risk_groups" , False ):
124+ self ._expand_shared_risk_groups (failed_entities , all_entities )
125+
132126 return list (failed_entities )
133127
134128 def _match_entities (
@@ -138,17 +132,24 @@ def _match_entities(
138132 logic : str ,
139133 ) -> List [str ]:
140134 """
141- Find which entity IDs satisfy the given conditions
142- combined by 'and'/'or' logic (or 'any' to skip checks).
135+ Return all entity IDs matching the given conditions based on 'and'/'or'/'any' logic.
143136
144137 Args:
145- all_entities: Mapping of entity_id -> attribute dict .
146- conditions: List of FailureCondition to apply .
138+ all_entities: { entity_id: attributes} .
139+ conditions: List of FailureCondition to evaluate .
147140 logic: "and", "or", or "any".
148141
149142 Returns:
150- A list of entity IDs that match according to the logic .
143+ A list of matching entity IDs.
151144 """
145+ if logic == "any" :
146+ # Skip condition checks; everything matches.
147+ return list (all_entities .keys ())
148+
149+ if not conditions :
150+ # If zero conditions, we match nothing unless logic='any'.
151+ return []
152+
152153 matched = []
153154 for entity_id , attr_dict in all_entities .items ():
154155 if self ._evaluate_conditions (attr_dict , conditions , logic ):
@@ -162,32 +163,22 @@ def _evaluate_conditions(
162163 logic : str ,
163164 ) -> bool :
164165 """
165- Check if the given entity meets all or any of the conditions, or if logic='any'.
166+ Evaluate multiple conditions on a single entity. All or any condition(s)
167+ must pass, depending on 'logic'.
166168
167169 Args:
168- entity_attrs: Attributes dict for one entity (node or link) .
169- conditions: List of FailureCondition.
170- logic: "and", "or", or "any ".
170+ entity_attrs: Attribute dict for one entity.
171+ conditions: List of FailureCondition to test .
172+ logic: "and" or "or ".
171173
172174 Returns:
173175 True if conditions pass, else False.
174176 """
175- if logic == "any" :
176- # 'any' means skip condition checks and always match
177- return True
178- if not conditions :
179- # If we have zero conditions, we treat this as no match unless logic='any'
180- return False
177+ if logic not in ("and" , "or" ):
178+ raise ValueError (f"Unsupported logic: { logic } " )
181179
182- # Evaluate each condition
183180 results = [_evaluate_condition (entity_attrs , c ) for c in conditions ]
184-
185- if logic == "and" :
186- return all (results )
187- elif logic == "or" :
188- return any (results )
189- else :
190- raise ValueError (f"Unsupported logic: { logic } " )
181+ return all (results ) if logic == "and" else any (results )
191182
192183 @staticmethod
193184 def _select_entities (
@@ -196,44 +187,78 @@ def _select_entities(
196187 rule : FailureRule ,
197188 ) -> List [str ]:
198189 """
199- From the matched set , pick which entities fail according to rule_type.
190+ From the matched IDs , pick which entities fail under the given rule_type.
200191
201192 Args:
202- entity_ids: IDs that matched the rule's conditions .
203- all_entities: Full entity dictionary (for potential future use ).
204- rule: The FailureRule specifying random/ choice/all selection .
193+ entity_ids: Matched entity IDs from _match_entities .
194+ all_entities: Full entity map (unused now, but available if needed ).
195+ rule: The FailureRule specifying ' random', ' choice', or 'all' .
205196
206197 Returns:
207- The final list of entity IDs that fail under this rule .
198+ A list of selected entity IDs to fail.
208199 """
200+ if not entity_ids :
201+ return []
202+
209203 if rule .rule_type == "random" :
210- # Each entity is chosen with probability=rule.probability
211- return [ent_id for ent_id in entity_ids if random () < rule .probability ]
204+ return [eid for eid in entity_ids if random () < rule .probability ]
212205 elif rule .rule_type == "choice" :
213- # Sample exactly 'count' from the matched set (or fewer if matched < count)
214206 count = min (rule .count , len (entity_ids ))
215- # Use sorted(...) for deterministic results
216207 return sample (sorted (entity_ids ), k = count )
217208 elif rule .rule_type == "all" :
218209 return entity_ids
219210 else :
220211 raise ValueError (f"Unsupported rule_type: { rule .rule_type } " )
221212
213+ def _expand_shared_risk_groups (
214+ self , failed_entities : set [str ], all_entities : Dict [str , Dict [str , Any ]]
215+ ) -> None :
216+ """
217+ Expand the 'failed_entities' set so that if an entity has
218+ shared_risk_group=X, all other entities with the same group also fail.
219+
220+ This is done iteratively until no new failures are found.
221+
222+ Args:
223+ failed_entities: Set of entity IDs already marked as failed.
224+ all_entities: Map of entity_id -> attributes (which may contain 'shared_risk_group').
225+ """
226+ # Pre-compute SRG -> entity IDs mapping for efficiency
227+ srg_map = defaultdict (set )
228+ for eid , attrs in all_entities .items ():
229+ srg = attrs .get ("shared_risk_group" )
230+ if srg :
231+ srg_map [srg ].add (eid )
232+
233+ queue = deque (failed_entities )
234+ while queue :
235+ current = queue .popleft ()
236+ current_srg = all_entities [current ].get ("shared_risk_group" )
237+ if not current_srg :
238+ continue
239+
240+ # All entities in the same SRG should fail
241+ for other_eid in srg_map [current_srg ]:
242+ if other_eid not in failed_entities :
243+ failed_entities .add (other_eid )
244+ queue .append (other_eid )
245+
222246
223247def _evaluate_condition (entity : Dict [str , Any ], cond : FailureCondition ) -> bool :
224248 """
225- Evaluate one FailureCondition (attr, operator, value) against entity attributes.
249+ Evaluate a single FailureCondition against an entity's attributes.
250+
251+ Operators supported:
252+ ==, !=, <, <=, >, >=, contains, not_contains, any_value, no_value
226253
227254 Args:
228- entity: The entity's attributes (e.g., node.attrs or link.attrs).
255+ entity: Entity attributes (e.g., node.attrs or link.attrs).
229256 cond: FailureCondition specifying (attr, operator, value).
230257
231258 Returns:
232259 True if the condition passes, else False.
233-
234- Raises:
235- ValueError: If the operator is not recognized.
236260 """
261+ has_attr = cond .attr in entity
237262 derived_value = entity .get (cond .attr , None )
238263 op = cond .operator
239264
@@ -242,12 +267,26 @@ def _evaluate_condition(entity: Dict[str, Any], cond: FailureCondition) -> bool:
242267 elif op == "!=" :
243268 return derived_value != cond .value
244269 elif op == "<" :
245- return derived_value < cond .value
270+ return ( derived_value is not None ) and ( derived_value < cond .value )
246271 elif op == "<=" :
247- return derived_value <= cond .value
272+ return ( derived_value is not None ) and ( derived_value <= cond .value )
248273 elif op == ">" :
249- return derived_value > cond .value
274+ return ( derived_value is not None ) and ( derived_value > cond .value )
250275 elif op == ">=" :
251- return derived_value >= cond .value
276+ return (derived_value is not None ) and (derived_value >= cond .value )
277+ elif op == "contains" :
278+ if derived_value is None :
279+ return False
280+ return cond .value in derived_value
281+ elif op == "not_contains" :
282+ if derived_value is None :
283+ return True
284+ return cond .value not in derived_value
285+ elif op == "any_value" :
286+ # Pass if the attribute key exists, even if the value is None
287+ return has_attr
288+ elif op == "no_value" :
289+ # Pass if the attribute key is missing or the value is None
290+ return (not has_attr ) or (derived_value is None )
252291 else :
253292 raise ValueError (f"Unsupported operator: { op } " )
0 commit comments