Skip to content

Commit e46dfe3

Browse files
committed
Add baremetal node adoption
Being able to adopt nodes is a necessity when losing the ironic DB or rebuilding the metalbox during upgrades or disaster recovery. The mechanism allows to force adoption by specifiying the `--adopt` option during baremetal sync. Nodes will automatically be adopted if they have their `provision_state set to `active` in the NetBox. Signed-off-by: Jan Horstmann <horstmann@osism.tech>
1 parent a84614c commit e46dfe3

3 files changed

Lines changed: 95 additions & 26 deletions

File tree

osism/commands/netbox.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ def get_parser(self, prog_name):
3131
type=int,
3232
help="Timeout for a scheduled task that has not been executed yet",
3333
)
34+
parser.add_argument(
35+
"--adopt",
36+
help="Adopt nodes rather than moving them to available\n"
37+
"Note: nodes are also adopted implicitly when the NetBox"
38+
"custom field 'provision_state' is set to 'active'.",
39+
action="store_true",
40+
)
3441
parser.add_argument(
3542
"--force",
3643
help="Force update of baremetal nodes (Used to update non-comparable items like passwords)",
@@ -69,6 +76,7 @@ def take_action(self, parsed_args):
6976

7077
task = conductor.sync_ironic.delay(
7178
node_name=node_name,
79+
adopt=parsed_args.adopt,
7280
force=parsed_args.force,
7381
dry_run=parsed_args.dry_run,
7482
skip_kernel_params=parsed_args.skip_kernel_params,

osism/tasks/conductor/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def sync_netbox(self, node_name=None, netbox_filter=None):
5353
def sync_ironic(
5454
self,
5555
node_name=None,
56+
adopt=False,
5657
force=False,
5758
dry_run=False,
5859
skip_kernel_params=None,
@@ -65,6 +66,7 @@ def sync_ironic(
6566
self.request.id,
6667
get_ironic_parameters,
6768
node_name,
69+
adopt,
6870
force,
6971
dry_run,
7072
skip_kernel_params=skip_kernel_params or [],

osism/tasks/conductor/ironic.py

Lines changed: 85 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,9 @@ def _prettify_for_display(obj):
352352
return result
353353

354354

355-
def _sync_ironic_device(request_id, device, node_attributes, ports_attributes, force):
355+
def _sync_ironic_device(
356+
request_id, device, node_attributes, ports_attributes, adopt, force
357+
):
356358
osism_utils.push_task_output(request_id, f"Processing device {device.name}\n")
357359
node = openstack.baremetal_node_show(device.name, ignore_missing=True)
358360
if not node:
@@ -411,6 +413,9 @@ def _sync_ironic_device(request_id, device, node_attributes, ports_attributes, f
411413
)
412414
openstack.baremetal_port_delete(node_port["id"])
413415

416+
# NOTE: Adopt nodes with provisioning state active in NetBox or if explicitly requested
417+
is_adoption = adopt or device.custom_fields.get("provision_state", None) == "active"
418+
414419
node_validation = openstack.baremetal_node_validate(node["uuid"])
415420
if node_validation["management"].result:
416421
osism_utils.push_task_output(
@@ -430,8 +435,8 @@ def _sync_ironic_device(request_id, device, node_attributes, ports_attributes, f
430435
request_id,
431436
f"Baremetal node for {device.name} is manageable\n",
432437
)
433-
# NOTE: Ironic keeps the power state found during enroll. We set the node power state to off in order to have a defined state for all newly synced nodes
434-
if node["power_state"] != "power off":
438+
if not is_adoption and node["power_state"] != "power off":
439+
# NOTE: Ironic keeps the power state found during enroll. We set the node power state to off in order to have a defined state for all newly synced nodes
435440
osism_utils.push_task_output(
436441
request_id,
437442
f"Setting power state to 'power off' for {device.name}\n",
@@ -449,40 +454,73 @@ def _sync_ironic_device(request_id, device, node_attributes, ports_attributes, f
449454
request_id,
450455
f"Validation of boot interface successful for baremetal node for {device.name}\n",
451456
)
452-
if node["provision_state"] == "manageable":
457+
if is_adoption and node["provision_state"] == "available":
458+
# Note: Prepare adoption of available nodes by moving them to manageable
453459
osism_utils.push_task_output(
454460
request_id,
455-
f"Transitioning baremetal node to available state for {device.name}\n",
461+
f"Prepare adoption of available baremetal node by transitioning to manageable state for {device.name}\n",
456462
)
457-
if node["automated_clean"]:
458-
# NOTE: Skip automated cleaning on transition from managable to available. We are waiting for the transition and do not want to wait on cleaning at this point
459-
node = openstack.baremetal_node_update(
460-
node["uuid"], dict(automated_clean=False)
463+
node = openstack.baremetal_node_set_provision_state(
464+
node["uuid"], "manage"
465+
)
466+
node = openstack.baremetal_node_wait_for_nodes_provision_state(
467+
node["uuid"], "manageable"
468+
)
469+
osism_utils.push_task_output(
470+
request_id,
471+
f"Baremetal node for {device.name} is manageable\n",
472+
)
473+
if node["provision_state"] == "manageable":
474+
if is_adoption:
475+
osism_utils.push_task_output(
476+
request_id,
477+
f"Adopting baremetal node for {device.name}\n",
461478
)
462-
try:
463-
openstack.baremetal_node_set_boot_device(
464-
node["uuid"], "cdrom", persistent=False
479+
node = openstack.baremetal_node_set_provision_state(
480+
node["uuid"], "adopt"
481+
)
482+
node = openstack.baremetal_node_wait_for_nodes_provision_state(
483+
node["uuid"], "active"
465484
)
466-
except Exception:
467485
osism_utils.push_task_output(
468486
request_id,
469-
f"Could not set boot device to cdrom for {device.name}, continuing\n",
487+
f"Baremetal node for {device.name} is active\n",
470488
)
471-
node = openstack.baremetal_node_set_provision_state(
472-
node["uuid"], "provide"
473-
)
474-
node = openstack.baremetal_node_wait_for_nodes_provision_state(
475-
node["uuid"], "available"
476-
)
489+
else:
490+
osism_utils.push_task_output(
491+
request_id,
492+
f"Transitioning baremetal node to available state for {device.name}\n",
493+
)
494+
if node["automated_clean"]:
495+
# NOTE: Skip automated cleaning on transition from managable to available. We are waiting for the transition and do not want to wait on cleaning at this point
496+
node = openstack.baremetal_node_update(
497+
node["uuid"], dict(automated_clean=False)
498+
)
499+
try:
500+
openstack.baremetal_node_set_boot_device(
501+
node["uuid"], "cdrom", persistent=False
502+
)
503+
except Exception:
504+
osism_utils.push_task_output(
505+
request_id,
506+
f"Could not set boot device to cdrom for {device.name}, continuing\n",
507+
)
508+
node = openstack.baremetal_node_set_provision_state(
509+
node["uuid"], "provide"
510+
)
511+
node = openstack.baremetal_node_wait_for_nodes_provision_state(
512+
node["uuid"], "available"
513+
)
514+
osism_utils.push_task_output(
515+
request_id,
516+
f"Baremetal node for {device.name} is available\n",
517+
)
518+
477519
if not node["automated_clean"]:
478520
# NOTE: Activate automated cleaning, so that future actions will trigger it
479521
node = openstack.baremetal_node_update(
480522
node["uuid"], dict(automated_clean=True)
481523
)
482-
osism_utils.push_task_output(
483-
request_id,
484-
f"Baremetal node for {device.name} is available\n",
485-
)
486524
else:
487525
osism_utils.push_task_output(
488526
request_id,
@@ -517,7 +555,7 @@ def _sync_ironic_device(request_id, device, node_attributes, ports_attributes, f
517555

518556

519557
def _sync_ironic_device_dry_run(
520-
request_id, device, node_attributes, ports_attributes, force, template_vars
558+
request_id, device, node_attributes, ports_attributes, adopt, force, template_vars
521559
):
522560
# Collect actual secret values for string-level masking
523561
secret_values = set()
@@ -553,6 +591,20 @@ def _indent_json(obj):
553591
request_id,
554592
f"[DRY RUN] Would CREATE port with MAC {port_attributes['address']} for {device.name}\n",
555593
)
594+
osism_utils.push_task_output(
595+
request_id,
596+
f"[DRY RUN] Would try to transition node to `manageable` for {device.name}\n",
597+
)
598+
if adopt or device.custom_fields["provision_state"] == "active":
599+
osism_utils.push_task_output(
600+
request_id,
601+
f"[DRY RUN] Would try to adopt node for {device.name}\n",
602+
)
603+
else:
604+
osism_utils.push_task_output(
605+
request_id,
606+
f"[DRY RUN] Would try to transition node to `available` for {device.name}\n",
607+
)
556608
else:
557609
# NOTE: Check whether the baremetal node needs to be updated
558610
node_updates = {}
@@ -617,6 +669,7 @@ def sync_ironic(
617669
request_id,
618670
get_ironic_parameters,
619671
node_name=None,
672+
adopt=False,
620673
force=False,
621674
dry_run=False,
622675
skip_kernel_params=None,
@@ -773,6 +826,7 @@ def sync_ironic(
773826
device,
774827
node_attributes,
775828
ports_attributes,
829+
adopt,
776830
force,
777831
template_vars,
778832
)
@@ -784,7 +838,12 @@ def sync_ironic(
784838
if lock.acquire(timeout=120):
785839
try:
786840
_sync_ironic_device(
787-
request_id, device, node_attributes, ports_attributes, force
841+
request_id,
842+
device,
843+
node_attributes,
844+
ports_attributes,
845+
adopt,
846+
force,
788847
)
789848
except Exception as exc:
790849
osism_utils.push_task_output(

0 commit comments

Comments
 (0)