-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path__init__.py
More file actions
673 lines (541 loc) · 20.4 KB
/
__init__.py
File metadata and controls
673 lines (541 loc) · 20.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
# SPDX-License-Identifier: Apache-2.0
import atexit
import threading
import time
import uuid
import os
from contextlib import redirect_stdout, redirect_stderr
from loguru import logger
import yaml
from osism import settings
_redis = None
_nb = None
_secondary_nb_list = None
_nb_initialized = False
_secondary_nb_initialized = False
_cleanup_registered = False
def _init_redis():
global _redis
if _redis is None:
from redis import Redis
_redis = Redis(
host=settings.REDIS_HOST,
port=settings.REDIS_PORT,
db=settings.REDIS_DB,
socket_keepalive=True,
)
_redis.ping()
return _redis
def _init_nb():
global _nb, _nb_initialized
if not _nb_initialized:
_nb = get_netbox_connection(
settings.NETBOX_URL, settings.NETBOX_TOKEN, settings.IGNORE_SSL_ERRORS
)
_nb_initialized = True
return _nb
def _init_secondary_nb_list():
global _secondary_nb_list, _secondary_nb_initialized
if not _secondary_nb_initialized:
try:
secondary_nb_settings_list = yaml.safe_load(settings.NETBOX_SECONDARIES)
supported_secondary_nb_keys = [
"NETBOX_URL",
"NETBOX_TOKEN",
"IGNORE_SSL_ERRORS",
"NETBOX_NAME",
"NETBOX_SITE",
]
result = []
if type(secondary_nb_settings_list) is not list:
raise TypeError(
f"Setting NETBOX_SECONDARIES needs to be an array of mappings containing supported NetBox API configuration: {supported_secondary_nb_keys}"
)
for secondary_nb_settings in secondary_nb_settings_list:
if type(secondary_nb_settings) is not dict:
raise TypeError(
f"Elements in setting NETBOX_SECONDARIES need to be mappings containing supported NetBox API configuration: {supported_secondary_nb_keys}"
)
for key in list(secondary_nb_settings.keys()):
if key not in supported_secondary_nb_keys:
raise ValueError(
f"Unknown key in element of setting NETBOX_SECONDARIES. Supported keys: {supported_secondary_nb_keys}"
)
if (
"NETBOX_URL" not in secondary_nb_settings
or not secondary_nb_settings["NETBOX_URL"]
):
raise ValueError(
"All NETBOX_URL values in the elements of setting NETBOX_SECONDARIES need to be valid NetBox URLs"
)
if (
"NETBOX_TOKEN" not in secondary_nb_settings
or not str(secondary_nb_settings["NETBOX_TOKEN"]).strip()
):
raise ValueError(
"All NETBOX_TOKEN values in the elements of setting NETBOX_SECONDARIES need to be valid NetBox tokens"
)
secondary_nb = get_netbox_connection(
secondary_nb_settings["NETBOX_URL"],
str(secondary_nb_settings["NETBOX_TOKEN"]).strip(),
secondary_nb_settings.get("IGNORE_SSL_ERRORS", True),
)
if "NETBOX_NAME" in secondary_nb_settings:
secondary_nb.netbox_name = secondary_nb_settings["NETBOX_NAME"]
if "NETBOX_SITE" in secondary_nb_settings:
secondary_nb.netbox_site = secondary_nb_settings["NETBOX_SITE"]
result.append(secondary_nb)
_secondary_nb_list = result
except (yaml.YAMLError, TypeError, ValueError) as exc:
logger.error(f"Error parsing settings NETBOX_SECONDARIES: {exc}")
_secondary_nb_list = []
_secondary_nb_initialized = True
return _secondary_nb_list
class RedisSemaphore:
"""Redis-based distributed semaphore for limiting concurrent operations.
This implementation uses Redis sorted sets to track active holders and enforce
a maximum concurrency limit.
"""
def __init__(self, redis_client, key, maxsize, timeout=None):
"""Initialize the semaphore.
Args:
redis_client: Redis client instance
key: Redis key for this semaphore
maxsize: Maximum number of concurrent holders
timeout: Optional timeout for acquisition in seconds
"""
self.redis = redis_client
self.key = f"semaphore:{key}"
self.maxsize = maxsize
self.timeout = timeout
self.identifier = None
def acquire(self, timeout=None):
"""Acquire the semaphore.
Args:
timeout: Optional timeout in seconds (overrides instance timeout)
Returns:
bool: True if acquired, False if timeout
"""
timeout = timeout or self.timeout or 10
identifier = str(uuid.uuid4())
now = time.time()
end_time = now + timeout
while time.time() < end_time:
# Clean up expired holders
self.redis.zremrangebyscore(self.key, 0, now - 60)
# Try to acquire
if self.redis.zcard(self.key) < self.maxsize:
self.redis.zadd(self.key, {identifier: now})
self.identifier = identifier
return True
time.sleep(0.01) # Wait 10ms before retry
return False
def release(self):
"""Release the semaphore."""
if self.identifier:
self.redis.zrem(self.key, self.identifier)
self.identifier = None
def __enter__(self):
"""Context manager entry."""
if not self.acquire():
raise TimeoutError(f"Could not acquire semaphore {self.key}")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.release()
return False
_TimeoutHTTPAdapterClass = None
def _get_timeout_http_adapter_class():
global _TimeoutHTTPAdapterClass
if _TimeoutHTTPAdapterClass is not None:
return _TimeoutHTTPAdapterClass
from requests.adapters import HTTPAdapter
class _TimeoutHTTPAdapter(HTTPAdapter):
"""HTTPAdapter that sets a default timeout for all requests."""
def __init__(
self,
timeout=None,
pool_connections=10,
pool_maxsize=10,
*args,
**kwargs,
):
self.timeout = timeout
super().__init__(
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
*args,
**kwargs,
)
def send(self, request, **kwargs):
if kwargs.get("timeout") is None and self.timeout is not None:
kwargs["timeout"] = self.timeout
return super().send(request, **kwargs)
_TimeoutHTTPAdapterClass = _TimeoutHTTPAdapter
return _TimeoutHTTPAdapterClass
class NetBoxSessionManager:
"""Manages a shared HTTP session for all NetBox connections.
This class implements lazy initialization of a single shared session
to prevent file descriptor exhaustion from multiple session instances.
"""
_session = None
_lock = None
@classmethod
def get_session(cls, ignore_ssl_errors=False, timeout=20):
"""Get or create the shared session (lazy initialization).
Args:
ignore_ssl_errors: Whether to ignore SSL certificate errors
timeout: Request timeout in seconds (default: 20)
Returns:
requests.Session: The shared session instance
"""
if cls._session is None:
import requests
import urllib3
if cls._lock is None:
cls._lock = threading.Lock()
with cls._lock:
if cls._session is None:
cls._session = requests.Session()
AdapterClass = _get_timeout_http_adapter_class()
adapter = AdapterClass(
timeout=timeout, pool_connections=10, pool_maxsize=10
)
cls._session.mount("http://", adapter)
cls._session.mount("https://", adapter)
if ignore_ssl_errors:
urllib3.disable_warnings()
cls._session.verify = False
return cls._session
@classmethod
def close_session(cls):
"""Close the shared session and release resources."""
if cls._session is not None:
cls._session.close()
cls._session = None
def cleanup_netbox_sessions():
"""Cleanup function to close all NetBox sessions."""
NetBoxSessionManager.close_session()
def get_netbox_connection(
netbox_url, netbox_token, ignore_ssl_errors=False, timeout=20
):
"""Create a NetBox API connection with shared session.
Args:
netbox_url: NetBox URL
netbox_token: NetBox API token
ignore_ssl_errors: Whether to ignore SSL certificate errors
timeout: Request timeout in seconds (default: 20)
Returns:
pynetbox.api instance or None
"""
import pynetbox
if netbox_url and netbox_token:
nb = pynetbox.api(netbox_url, token=netbox_token)
if nb:
global _cleanup_registered
if not _cleanup_registered:
atexit.register(cleanup_netbox_sessions)
_cleanup_registered = True
# Use shared session instead of creating new one
nb.http_session = NetBoxSessionManager.get_session(
ignore_ssl_errors=ignore_ssl_errors, timeout=timeout
)
else:
nb = None
return nb
def get_openstack_connection():
import keystoneauth1
import openstack
try:
conn = openstack.connect()
except keystoneauth1.exceptions.auth_plugins.MissingRequiredOptions as e:
raise RuntimeError(
"OpenStack connection failed: missing required authentication options"
) from e
return conn
def get_ansible_vault_password():
from cryptography.fernet import Fernet
keyfile = "/share/ansible_vault_password.key"
try:
with open(keyfile, "r") as fp:
key = fp.read()
f = Fernet(key)
r = _init_redis()
encrypted_ansible_vault_password = r.get("ansible_vault_password")
if encrypted_ansible_vault_password is None:
raise ValueError("Ansible vault password is not set in Redis")
ansible_vault_password = f.decrypt(encrypted_ansible_vault_password)
password = ansible_vault_password.decode("utf-8")
if not password or password.strip() == "":
raise ValueError(
"Ansible vault password is empty or contains only whitespace"
)
return password
except Exception as exc:
logger.error("Unable to get ansible vault password")
raise exc
# https://stackoverflow.com/questions/2361426/get-the-first-item-from-an-iterable-that-matches-a-condition
def first(iterable, condition=lambda x: True):
"""
Returns the first item in the `iterable` that
satisfies the `condition`.
If the condition is not given, returns the first item of
the iterable.
Raises `StopIteration` if no item satysfing the condition is found.
>>> first( (1,2,3), condition=lambda x: x % 2 == 0)
2
>>> first(range(3, 100))
3
>>> first( () )
Traceback (most recent call last):
...
StopIteration
"""
return next(x for x in iterable if condition(x))
def fetch_task_output(
task_id, timeout=os.environ.get("OSISM_TASK_TIMEOUT", 300), enable_play_recap=False
):
r = _init_redis()
rc = 0
stoptime = time.time() + timeout
last_id = 0
while time.time() < stoptime:
data = r.xread({str(task_id): last_id}, count=1, block=(timeout * 1000))
if data:
stoptime = time.time() + timeout
messages = data[0]
for message_id, message in messages[1]:
last_id = message_id.decode()
message_type = message[b"type"].decode()
message_content = message[b"content"].decode()
logger.debug(f"Processing message {last_id} of type {message_type}")
r.xdel(str(task_id), last_id)
if message_type == "stdout":
print(message_content, end="")
if enable_play_recap and "PLAY RECAP" in message_content:
logger.info(
"Play has been completed. There may now be a delay until "
"all logs have been written."
)
logger.info("Please wait and do not abort execution.")
elif message_type == "rc":
rc = int(message_content)
elif message_type == "action" and message_content == "quit":
r.close()
return rc
raise TimeoutError
def push_task_output(task_id, line):
_init_redis().xadd(task_id, {"type": "stdout", "content": line})
def finish_task_output(task_id, rc=None):
r = _init_redis()
if rc:
r.xadd(task_id, {"type": "rc", "content": rc})
r.xadd(task_id, {"type": "action", "content": "quit"})
def revoke_task(task_id):
"""
Revoke a running Celery task.
Args:
task_id (str): The ID of the task to revoke
Returns:
bool: True if revocation was successful, False otherwise
"""
try:
from celery import Celery
from osism.tasks import Config
app = Celery("task")
app.config_from_object(Config)
app.control.revoke(task_id, terminate=True)
return True
except Exception as e:
logger.error(f"Failed to revoke task {task_id}: {e}")
return False
def create_redlock(key, auto_release_time=3600):
"""
Create a Redlock instance with output suppression during initialization.
Args:
key (str): The lock key
auto_release_time (int): Auto release time in seconds (default: 3600)
Returns:
Redlock: The configured Redlock instance
"""
import logging
# Permanently suppress pottery logger output
pottery_logger = logging.getLogger("pottery")
pottery_logger.setLevel(logging.CRITICAL)
with open(os.devnull, "w") as devnull:
with redirect_stdout(devnull), redirect_stderr(devnull):
from pottery import Redlock
return Redlock(
key=key,
masters={_init_redis()},
auto_release_time=auto_release_time,
)
def create_netbox_semaphore(netbox_url, max_connections=None):
"""
Create a Redis semaphore for limiting concurrent NetBox connections.
Args:
netbox_url (str): The NetBox URL to create a semaphore for
max_connections (int): Maximum concurrent connections (default: from settings.NETBOX_MAX_CONNECTIONS)
Returns:
RedisSemaphore: The configured semaphore instance
"""
if max_connections is None:
max_connections = settings.NETBOX_MAX_CONNECTIONS
# Create unique key per NetBox instance based on URL
import hashlib
url_hash = hashlib.md5(netbox_url.encode()).hexdigest()[:8]
key = f"netbox_semaphore_{url_hash}"
return RedisSemaphore(
key=key,
maxsize=max_connections,
redis_client=_init_redis(),
timeout=30,
)
def set_task_lock(user=None, reason=None):
"""
Set task lock to prevent new tasks from starting.
Args:
user (str): User who set the lock (optional)
reason (str): Reason for the lock (optional)
Returns:
bool: True if lock was set successfully
"""
try:
import json
from datetime import datetime
lock_data = {
"locked": True,
"timestamp": datetime.now().isoformat(),
"user": user or settings.OPERATOR_USER,
"reason": reason,
}
_init_redis().set("osism:task_lock", json.dumps(lock_data))
return True
except Exception as e:
logger.error(f"Failed to set task lock: {e}")
return False
def remove_task_lock():
"""
Remove task lock to allow new tasks to start.
Returns:
bool: True if lock was removed successfully
"""
try:
_init_redis().delete("osism:task_lock")
return True
except Exception as e:
logger.error(f"Failed to remove task lock: {e}")
return False
def is_task_locked():
"""
Check if tasks are currently locked.
Returns:
dict: Lock status information or None if not locked
"""
try:
import json
lock_data = _init_redis().get("osism:task_lock")
if lock_data:
return json.loads(lock_data.decode("utf-8"))
return None
except Exception as e:
logger.error(f"Failed to check task lock status: {e}")
return None
def check_ansible_facts(max_age=None):
"""Check if Ansible facts exist in Redis and are not stale.
Scans Redis for ansible_facts* keys and checks the
ansible_date_time.epoch field to determine freshness.
Args:
max_age: Maximum age in seconds (default: settings.FACTS_MAX_AGE)
"""
import json
if max_age is None:
max_age = settings.FACTS_MAX_AGE
try:
r = _init_redis()
# Find all ansible_facts keys
keys = []
cursor = 0
while True:
cursor, batch = r.scan(cursor, match="ansible_facts*", count=100)
keys.extend(batch)
if cursor == 0:
break
except Exception as e:
logger.warning(f"Could not check Ansible facts freshness: {e}")
return
if not keys:
logger.warning(
"No Ansible facts found in Redis cache. "
"Run 'osism sync facts' to gather facts."
)
return
now = time.time()
stale_hosts = []
for key in keys:
data = None
try:
key_str = key.decode() if isinstance(key, bytes) else key
hostname = key_str.replace("ansible_facts", "", 1)
data = r.get(key)
if not data:
continue
facts = json.loads(data)
date_time = facts.get("ansible_date_time", {})
epoch = date_time.get("epoch")
if epoch is None:
logger.debug(
f"Host '{hostname}': facts missing ansible_date_time.epoch"
)
continue
age = now - float(epoch)
if age > max_age:
stale_hosts.append((hostname, int(age)))
except (json.JSONDecodeError, ValueError, TypeError):
truncated_value = data
if isinstance(truncated_value, (bytes, str)):
truncated_value = truncated_value[:200]
logger.debug(
"Skipping malformed ansible_facts entry for key %r: %r",
key,
truncated_value,
exc_info=True,
)
continue
if stale_hosts:
logger.warning(
f"Ansible facts in Redis are stale for {len(stale_hosts)} host(s) "
f"(older than {max_age} seconds). "
f"Run 'osism sync facts' to update facts."
)
for hostname, age in stale_hosts:
logger.warning(f" Host '{hostname}': facts are {age} seconds old")
def check_task_lock_and_exit():
"""
Check if tasks are locked and exit with error message if they are.
Used by commands that should not run when tasks are locked.
"""
lock_info = is_task_locked()
if lock_info and lock_info.get("locked"):
user = lock_info.get("user", "unknown")
timestamp = lock_info.get("timestamp", "unknown")
reason = lock_info.get("reason")
logger.error(f"Tasks are currently locked by {user} at {timestamp}")
if reason:
logger.error(f"Reason: {reason}")
logger.error("Use 'osism unlock' to remove the lock")
exit(1)
def __getattr__(name):
if name == "redis":
val = _init_redis()
globals()["redis"] = val
return val
elif name == "nb":
val = _init_nb()
globals()["nb"] = val
return val
elif name == "secondary_nb_list":
val = _init_secondary_nb_list()
globals()["secondary_nb_list"] = val
return val
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")