diff --git a/crates/api-db/src/switch.rs b/crates/api-db/src/switch.rs index 40c1fcb638..9b2510279a 100644 --- a/crates/api-db/src/switch.rs +++ b/crates/api-db/src/switch.rs @@ -22,6 +22,7 @@ use carbide_uuid::switch::SwitchId; use chrono::prelude::*; use config_version::{ConfigVersion, Versioned}; use health_report::{HealthReport, HealthReportApplyMode}; +use mac_address::MacAddress; use model::controller_outcome::PersistentStateHandlerOutcome; use model::metadata::Metadata; use model::rack::RackFirmwareUpgradeStatus; @@ -459,34 +460,6 @@ pub async fn update(switch: &Switch, txn: &mut PgConnection) -> Result DatabaseResult> { - let sql = r#" - SELECT - es.serial_number, - es.bmc_mac_address, - mia.address as ip_address - FROM expected_switches es - JOIN machine_interfaces mi ON mi.mac_address = es.bmc_mac_address - JOIN machine_interface_addresses mia ON mia.interface_id = mi.id - JOIN network_segments ns ON ns.id = mi.segment_id - WHERE ns.network_segment_type = 'underlay' - "#; - - sqlx::query_as(sql) - .fetch_all(txn) - .await - .map_err(|err| DatabaseError::new("list_switch_bmc_info", err)) -} - /// Resolve SwitchIds to BMC IPs via the FK path: /// switches.bmc_mac_address -> expected_switches.bmc_mac_address /// -> machine_interfaces -> machine_interface_addresses (underlay) -> IP @@ -611,38 +584,6 @@ pub async fn update_metadata( } } -#[derive(Debug, sqlx::FromRow)] -pub struct SwitchBmcRow { - pub switch_id: SwitchId, - pub bmc_mac: MacAddress, - pub bmc_ip: IpAddr, -} - -/// Resolve SwitchIds to BMC MAC + IP via machine_interfaces. -pub async fn find_bmc_info_by_switch_ids( - db: impl crate::db_read::DbReader<'_>, - switch_ids: &[SwitchId], -) -> DatabaseResult> { - let sql = r#" - SELECT DISTINCT ON (mi.switch_id) - mi.switch_id, - mi.mac_address AS bmc_mac, - mia.address AS bmc_ip - FROM machine_interfaces mi - JOIN machine_interface_addresses mia ON mia.interface_id = mi.id - JOIN network_segments ns ON ns.id = mi.segment_id - WHERE mi.switch_id = ANY($1) - AND ns.network_segment_type = 'underlay' - ORDER BY mi.switch_id - "#; - - sqlx::query_as(sql) - .bind(switch_ids) - .fetch_all(db) - .await - .map_err(|err| DatabaseError::new("switch::find_bmc_info_by_switch_ids", err)) -} - /// A switch resolved by its BMC MAC address, along with the rack it belongs /// to. Used by the Component Manager state controller wrapper to build a /// rack-level `MaintenanceScope` for the switches it's been asked to act on. diff --git a/crates/api/src/api.rs b/crates/api/src/api.rs index 88c43a8a3f..6c597e4a1f 100644 --- a/crates/api/src/api.rs +++ b/crates/api/src/api.rs @@ -899,6 +899,13 @@ impl Forge for Api { crate::handlers::credential::get_bmc_credentals(self, request).await } + async fn get_switch_nvos_credentials( + &self, + request: Request, + ) -> Result, Status> { + crate::handlers::credential::get_switch_nvos_credentials(self, request).await + } + /// Network status of each managed host, as reported by forge-dpu-agent. /// For use by forge-admin-cli /// diff --git a/crates/api/src/auth/internal_rbac_rules.rs b/crates/api/src/auth/internal_rbac_rules.rs index 867c4f14d3..7108e47a16 100644 --- a/crates/api/src/auth/internal_rbac_rules.rs +++ b/crates/api/src/auth/internal_rbac_rules.rs @@ -250,6 +250,7 @@ impl InternalRBACRules { x.perm("DeleteTenantKeyset", vec![SiteAgent]); x.perm("ValidateTenantPublicKey", vec![SiteAgent, Ssh, SshRs]); x.perm("GetBmcCredentials", vec![Health, BmcProxy]); + x.perm("GetSwitchNvosCredentials", vec![Health]); x.perm("GetAllManagedHostNetworkStatus", vec![ForgeAdminCLI]); x.perm( "GetSiteExplorationReport", diff --git a/crates/api/src/handlers/credential.rs b/crates/api/src/handlers/credential.rs index f93a0ef3eb..79c7085686 100644 --- a/crates/api/src/handlers/credential.rs +++ b/crates/api/src/handlers/credential.rs @@ -409,7 +409,10 @@ pub(crate) async fn get_bmc_credentals( }) .await .map_err(|e| CarbideError::internal(e.to_string()))? - .ok_or_else(|| CarbideError::internal("missing credentials".to_string()))?; + .ok_or_else(|| CarbideError::NotFoundError { + kind: "bmc_root_credentials", + id: req.mac_addr.clone(), + })?; let (username, password) = match credentials { Credentials::UsernamePassword { username, password } => (username, password), @@ -424,6 +427,62 @@ pub(crate) async fn get_bmc_credentals( })) } +pub(crate) async fn get_switch_nvos_credentials( + api: &Api, + request: tonic::Request, +) -> Result, tonic::Status> { + crate::api::log_request_data(&request); + + let req = request.into_inner(); + let switch_id = req + .switch_id + .ok_or_else(|| CarbideError::InvalidArgument("switch_id is required".to_string()))?; + + let bmc_mac_address = { + let mut txn = api.txn_begin().await?; + let switches = db::switch::find_by( + &mut txn, + db::ObjectColumnFilter::One(db::switch::IdColumn, &switch_id), + ) + .await?; + let _ = txn.rollback().await; + + let switch = switches + .first() + .ok_or_else(|| CarbideError::NotFoundError { + kind: "switch", + id: switch_id.to_string(), + })?; + + switch + .bmc_mac_address + .ok_or_else(|| CarbideError::NotFoundError { + kind: "switch_bmc_mac_address", + id: switch_id.to_string(), + })? + }; + + let credentials = api + .credential_manager + .get_credentials(&CredentialKey::SwitchNvosAdmin { bmc_mac_address }) + .await + .map_err(|e| CarbideError::internal(e.to_string()))? + .ok_or_else(|| CarbideError::NotFoundError { + kind: "switch_nvos_credentials", + id: switch_id.to_string(), + })?; + + let Credentials::UsernamePassword { username, password } = credentials; + + Ok(Response::new(rpc::GetBmcCredentialsResponse { + credentials: Some(rpc::BmcCredentials { + r#type: Some(rpc::bmc_credentials::Type::UsernamePassword( + rpc::UsernamePassword { username, password }, + )), + }), + })) +} + async fn set_sitewide_bmc_root_credentials( api: &Api, password: String, diff --git a/crates/api/src/handlers/switch.rs b/crates/api/src/handlers/switch.rs index 599b50b085..c15a3ef3a8 100644 --- a/crates/api/src/handlers/switch.rs +++ b/crates/api/src/handlers/switch.rs @@ -68,27 +68,17 @@ pub async fn find_switch( })? }; - let bmc_info_map: std::collections::HashMap = { - let rows = db_switch::list_switch_bmc_info(&mut txn) + let switch_ids: Vec<_> = switch_list.iter().map(|switch| switch.id).collect(); + let endpoint_info_map: std::collections::HashMap<_, _> = if switch_ids.is_empty() { + std::collections::HashMap::new() + } else { + db_switch::find_switch_endpoints_by_ids(&mut *txn, &switch_ids) .await .map_err(|e| CarbideError::Internal { - message: format!("Failed to get switch BMC info: {}", e), - })?; - - rows.into_iter() - .map(|row| { - ( - row.bmc_mac_address.to_string(), - rpc::BmcInfo { - ip: Some(row.ip_address.to_string()), - mac: Some(row.bmc_mac_address.to_string()), - version: None, - firmware_version: None, - port: None, - machine_interface_id: None, - }, - ) - }) + message: format!("Failed to get switch endpoint info: {}", e), + })? + .into_iter() + .map(|row| (row.switch_id, row)) .collect() }; @@ -99,13 +89,33 @@ pub async fn find_switch( let switches: Vec = switch_list .into_iter() .map(|s| { - let bmc_info = s - .bmc_mac_address - .as_ref() - .and_then(|mac| bmc_info_map.get(&mac.to_string()).cloned()); + let endpoint_info = endpoint_info_map.get(&s.id); rpc::Switch::try_from(s).map(|mut rpc_switch| { - rpc_switch.bmc_info = bmc_info; + rpc_switch.bmc_info = endpoint_info.map(|row| rpc::BmcInfo { + ip: Some(row.bmc_ip.to_string()), + mac: Some(row.bmc_mac.to_string()), + version: None, + firmware_version: None, + port: None, + machine_interface_id: None, + }); + rpc_switch.nvos_info = endpoint_info.and_then(|row| { + let (Some(nvos_mac), Some(nvos_ip)) = + (row.nvos_mac.as_ref(), row.nvos_ip.as_ref()) + else { + return None; + }; + + Some(rpc::BmcInfo { + ip: Some(nvos_ip.to_string()), + mac: Some(nvos_mac.to_string()), + version: None, + firmware_version: None, + port: None, + machine_interface_id: None, + }) + }); rpc_switch }) }) @@ -158,40 +168,48 @@ pub async fn find_by_ids( ) .await?; - let bmc_info_map: std::collections::HashMap<_, _> = { - let rows = db_switch::find_bmc_info_by_switch_ids(&mut txn, &switch_ids) + let endpoint_info_map: std::collections::HashMap<_, _> = + db_switch::find_switch_endpoints_by_ids(&mut txn, &switch_ids) .await .map_err(|e| CarbideError::Internal { - message: format!("Failed to get switch BMC info: {}", e), - })?; - - rows.into_iter() - .map(|row| { - ( - row.switch_id, - rpc::BmcInfo { - ip: Some(row.bmc_ip.to_string()), - mac: Some(row.bmc_mac.to_string()), - version: None, - firmware_version: None, - port: None, - machine_interface_id: None, - }, - ) - }) - .collect() - }; + message: format!("Failed to get switch endpoint info: {}", e), + })? + .into_iter() + .map(|row| (row.switch_id, row)) + .collect(); let _ = txn.rollback().await; let switches: Vec = switch_list .into_iter() .map(|s| { - let id = s.id; - let bmc_info = bmc_info_map.get(&id).cloned(); + let endpoint_info = endpoint_info_map.get(&s.id); rpc::Switch::try_from(s).map(|mut rpc_switch| { - rpc_switch.bmc_info = bmc_info; + rpc_switch.bmc_info = endpoint_info.map(|row| rpc::BmcInfo { + ip: Some(row.bmc_ip.to_string()), + mac: Some(row.bmc_mac.to_string()), + version: None, + firmware_version: None, + port: None, + machine_interface_id: None, + }); + rpc_switch.nvos_info = endpoint_info.and_then(|row| { + let (Some(nvos_mac), Some(nvos_ip)) = + (row.nvos_mac.as_ref(), row.nvos_ip.as_ref()) + else { + return None; + }; + + Some(rpc::BmcInfo { + ip: Some(nvos_ip.to_string()), + mac: Some(nvos_mac.to_string()), + version: None, + firmware_version: None, + port: None, + machine_interface_id: None, + }) + }); rpc_switch }) }) diff --git a/crates/api/src/tests/credential.rs b/crates/api/src/tests/credential.rs index 2fa72f01ba..ad45c83168 100644 --- a/crates/api/src/tests/credential.rs +++ b/crates/api/src/tests/credential.rs @@ -16,7 +16,8 @@ */ use forge_secrets::credentials::{ - BgpCredentialType, CredentialKey, CredentialReader, CredentialType, Credentials, + BgpCredentialType, CredentialKey, CredentialReader, CredentialType, CredentialWriter, + Credentials, }; use rpc::forge::forge_server::Forge; use rpc::forge::{ @@ -26,6 +27,7 @@ use tonic::Code; use crate::handlers::credential::MAX_BGP_PASSWORD_LENGTH; use crate::tests::common::api_fixtures::create_test_env; +use crate::tests::common::api_fixtures::site_explorer::new_switch; #[crate::sqlx_test] async fn test_create_host_uefi_credential_when_missing(pool: sqlx::PgPool) { @@ -243,3 +245,46 @@ async fn test_create_bgp_credential_validates_max_password_length(pool: sqlx::Pg }) ); } + +#[crate::sqlx_test] +async fn test_get_switch_nvos_credentials(pool: sqlx::PgPool) -> eyre::Result<()> { + let env = create_test_env(pool).await; + let switch_id = new_switch(&env, Some("Switch1".to_string()), None).await?; + let bmc_mac_address = db::switch::find_switch_endpoints_by_ids(&env.pool, &[switch_id]) + .await? + .first() + .expect("switch endpoint row") + .bmc_mac; + + env.test_credential_manager + .set_credentials( + &CredentialKey::SwitchNvosAdmin { bmc_mac_address }, + &Credentials::UsernamePassword { + username: "nvos-admin".to_string(), + password: "nvos-secret".to_string(), + }, + ) + .await?; + + let response = env + .api + .get_switch_nvos_credentials(tonic::Request::new( + rpc::forge::GetSwitchNvosCredentialsRequest { + switch_id: Some(switch_id), + }, + )) + .await? + .into_inner(); + + let credentials = response.credentials.expect("credentials"); + let Some(rpc::forge::bmc_credentials::Type::UsernamePassword(username_password)) = + credentials.r#type + else { + panic!("expected username/password credentials"); + }; + + assert_eq!(username_password.username, "nvos-admin"); + assert_eq!(username_password.password, "nvos-secret"); + + Ok(()) +} diff --git a/crates/api/src/tests/switch_find.rs b/crates/api/src/tests/switch_find.rs index bbbe39904a..34b2021d4d 100644 --- a/crates/api/src/tests/switch_find.rs +++ b/crates/api/src/tests/switch_find.rs @@ -254,11 +254,108 @@ async fn test_find_switches_by_ids_response_fields( // state_version should be populated assert!(!switch.state_version.is_empty()); - // bmc_info is None when no machine_interface discovery data exists + // bmc_info should be populated from the seeded machine_interface discovery data assert!( - switch.bmc_info.is_none(), - "bmc_info should be None when no discovery data exists" + switch.bmc_info.is_some(), + "bmc_info should be present when discovery data exists" ); Ok(()) } + +#[crate::sqlx_test] +async fn test_find_switches_by_ids_includes_resolved_nvos_info( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env(pool).await; + let switch_id = new_switch(&env, Some("Switch1".to_string()), None).await?; + + let mut rows = db::switch::find_switch_endpoints_by_ids(&env.pool, &[switch_id]).await?; + let expected = rows.pop().expect("switch endpoint row"); + let host_mac = expected.nvos_mac.expect("nvos mac"); + let host_ip = expected.nvos_ip.expect("nvos ip"); + + let response = env + .api + .find_switches_by_ids(tonic::Request::new(rpc::forge::SwitchesByIdsRequest { + switch_ids: vec![switch_id], + })) + .await? + .into_inner(); + + assert_eq!(response.switches.len(), 1); + let switch = &response.switches[0]; + assert_eq!(switch.id, Some(switch_id)); + assert_eq!( + switch.bmc_info.as_ref().and_then(|info| info.mac.clone()), + Some(expected.bmc_mac.to_string()) + ); + assert_eq!( + switch.bmc_info.as_ref().and_then(|info| info.ip.clone()), + Some(expected.bmc_ip.to_string()) + ); + + let nvos_info = switch.nvos_info.as_ref().expect("nvos info"); + assert_eq!(nvos_info.mac, Some(host_mac.to_string())); + assert_eq!(nvos_info.ip, Some(host_ip.to_string())); + + Ok(()) +} + +#[crate::sqlx_test] +async fn test_find_switches_includes_resolved_nvos_info( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env(pool).await; + let switch_id = new_switch(&env, Some("Switch1".to_string()), None).await?; + + let mut rows = db::switch::find_switch_endpoints_by_ids(&env.pool, &[switch_id]).await?; + let expected = rows.pop().expect("switch endpoint row"); + let host_mac = expected.nvos_mac.expect("nvos mac"); + let host_ip = expected.nvos_ip.expect("nvos ip"); + + let response = env + .api + .find_switches(tonic::Request::new(rpc::forge::SwitchQuery { + name: None, + switch_id: Some(switch_id), + })) + .await? + .into_inner(); + + assert_eq!(response.switches.len(), 1); + let nvos_info = response.switches[0].nvos_info.as_ref().expect("nvos info"); + assert_eq!(nvos_info.mac, Some(host_mac.to_string())); + assert_eq!(nvos_info.ip, Some(host_ip.to_string())); + + Ok(()) +} + +#[crate::sqlx_test] +async fn test_find_switches_by_ids_returns_no_nvos_info_when_unresolved( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env(pool).await; + let switch_id = new_switch(&env, Some("Switch1".to_string()), None).await?; + let rows = db::switch::find_switch_endpoints_by_ids(&env.pool, &[switch_id]).await?; + let bmc_mac = rows.first().expect("switch endpoint row").bmc_mac; + + { + let mut txn = env.pool.begin().await?; + db::expected_switch::update_nvos_mac_addresses(txn.as_mut(), bmc_mac, &[]).await?; + txn.commit().await?; + } + + let response = env + .api + .find_switches_by_ids(tonic::Request::new(rpc::forge::SwitchesByIdsRequest { + switch_ids: vec![switch_id], + })) + .await? + .into_inner(); + + assert_eq!(response.switches.len(), 1); + assert!(response.switches[0].nvos_info.is_none()); + + Ok(()) +} diff --git a/crates/health/example/config.example.toml b/crates/health/example/config.example.toml index 87972a6778..19e215fcc3 100644 --- a/crates/health/example/config.example.toml +++ b/crates/health/example/config.example.toml @@ -46,7 +46,15 @@ port = 443 mac = "11:22:33:44:55:66" username = "admin" password = "secret" -switch = { id = "fsw100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0", serial = "SN-SWITCH-001", slot_number = 7, tray_index = 3 } +switch = { id = "fsw100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0", serial = "SN-SWITCH-BMC-001", endpoint_role = "bmc", slot_number = 7, tray_index = 3 } + +[[endpoint_sources.static_bmc_endpoints]] +ip = "10.0.1.2" +port = 443 +mac = "11:22:33:44:55:77" +username = "admin" +password = "secret" +switch = { id = "fsw100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0", serial = "SN-SWITCH-HOST-001", endpoint_role = "host", is_primary = true, slot_number = 7, tray_index = 3 } [[endpoint_sources.static_bmc_endpoints]] ip = "10.0.2.1" diff --git a/crates/health/src/api_client.rs b/crates/health/src/api_client.rs index b49c78310b..50e1f098b3 100644 --- a/crates/health/src/api_client.rs +++ b/crates/health/src/api_client.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use std::sync::{Arc, RwLock}; use carbide_uuid::rack::RackId; +use carbide_uuid::switch::SwitchId; use forge_tls::client_config::ClientCert; use mac_address::MacAddress; use rpc::forge::MachineSearchConfig; @@ -31,7 +32,7 @@ use url::Url; use crate::HealthError; use crate::endpoint::{ BmcAddr, BmcCredentials, BmcEndpoint, BoxFuture, CredentialProvider, EndpointMetadata, - EndpointSource, MachineData, PowerShelfData, SwitchData, + EndpointSource, MachineData, PowerShelfData, SwitchData, SwitchEndpointRole, }; #[derive(Clone)] @@ -42,6 +43,13 @@ pub struct ApiClientWrapper { #[derive(Clone)] struct ApiCredentialProvider { client: ForgeApiClient, + kind: ApiCredentialKind, +} + +#[derive(Clone)] +enum ApiCredentialKind { + Bmc, + SwitchNvosAdmin { switch_id: SwitchId }, } impl CredentialProvider for ApiCredentialProvider { @@ -50,24 +58,68 @@ impl CredentialProvider for ApiCredentialProvider { endpoint: &'a BmcAddr, ) -> BoxFuture<'a, Result> { Box::pin(async move { - let request = rpc::forge::GetBmcCredentialsRequest { - mac_addr: endpoint.mac.to_string(), + let response = match &self.kind { + ApiCredentialKind::Bmc => { + let request = rpc::forge::GetBmcCredentialsRequest { + mac_addr: endpoint.mac.to_string(), + }; + + self.client + .get_bmc_credentials(request) + .await + .map_err(HealthError::ApiInvocationError)? + } + ApiCredentialKind::SwitchNvosAdmin { switch_id } => { + let request = rpc::forge::GetSwitchNvosCredentialsRequest { + switch_id: Some(*switch_id), + }; + + self.client + .get_switch_nvos_credentials(request) + .await + .map_err(HealthError::ApiInvocationError)? + } }; - self.client - .get_bmc_credentials(request) - .await - .map_err(HealthError::ApiInvocationError)? + response .credentials .and_then(|credentials| credentials.r#type) .map(Into::into) .ok_or_else(|| { - HealthError::GenericError("missing BMC credentials in API response".to_string()) + HealthError::GenericError("missing credentials in API response".to_string()) }) }) } } +fn switch_endpoint_metadata( + switch: &rpc::forge::Switch, + endpoint_role: SwitchEndpointRole, + nmxt_enabled: bool, +) -> Result { + let serial = switch + .config + .as_ref() + .map(|config| config.name.clone()) + .ok_or_else(|| HealthError::GenericError("switch endpoint does not have serial".into()))?; + + Ok(EndpointMetadata::Switch(SwitchData { + id: switch.id, + serial, + slot_number: switch + .placement_in_rack + .as_ref() + .and_then(|placement| placement.slot_number), + tray_index: switch + .placement_in_rack + .as_ref() + .and_then(|placement| placement.tray_index), + endpoint_role, + is_primary: switch.is_primary, + nmxt_enabled, + })) +} + impl ApiClientWrapper { pub fn new(root_ca: String, client_cert: String, client_key: String, api_url: &Url) -> Self { let client_config = ForgeClientConfig::new( @@ -157,6 +209,18 @@ impl ApiClientWrapper { "Could not add switch endpoint due to error" ), } + + match self.extract_switch_host_endpoint(&switch).await { + Ok(Some(endpoint)) => endpoints.push(Arc::new(endpoint)), + Ok(None) => {} + Err(error) => { + tracing::warn!( + ?switch, + ?error, + "Could not add switch host endpoint due to error" + ); + } + } } tracing::debug!(count = endpoints.len(), "Fetched switch endpoints"); @@ -233,8 +297,13 @@ impl ApiClientWrapper { }) }); - self.endpoint_with_auth(addr, metadata, machine.rack_id.clone()) - .await + self.endpoint_with_auth( + addr, + metadata, + machine.rack_id.clone(), + ApiCredentialKind::Bmc, + ) + .await } async fn extract_switch_endpoint( @@ -247,31 +316,44 @@ impl ApiClientWrapper { )); }; let addr = BmcAddr::try_from(bmc_info)?; - let serial = switch - .config - .as_ref() - .map(|config| config.name.clone()) - .ok_or(HealthError::GenericError( - "Switch endpont does not have serial".to_string(), - ))?; self.endpoint_with_auth( addr, - Some(EndpointMetadata::Switch(SwitchData { - id: switch.id, - serial, - slot_number: switch - .placement_in_rack - .as_ref() - .and_then(|placement| placement.slot_number), - tray_index: switch - .placement_in_rack - .as_ref() - .and_then(|placement| placement.tray_index), - })), - None, + Some(switch_endpoint_metadata( + switch, + SwitchEndpointRole::Bmc, + false, + )?), + switch.rack_id.clone(), + ApiCredentialKind::Bmc, + ) + .await + } + + async fn extract_switch_host_endpoint( + &self, + switch: &rpc::forge::Switch, + ) -> Result, HealthError> { + let Some(nvos_info) = switch.nvos_info.as_ref() else { + return Ok(None); + }; + let switch_id = switch.id.ok_or_else(|| { + HealthError::GenericError("switch host endpoint missing switch ID".to_string()) + })?; + let addr = BmcAddr::try_from(nvos_info)?; + + self.endpoint_with_auth( + addr, + Some(switch_endpoint_metadata( + switch, + SwitchEndpointRole::Host, + switch.is_primary, + )?), + switch.rack_id.clone(), + ApiCredentialKind::SwitchNvosAdmin { switch_id }, ) .await + .map(Some) } async fn extract_power_shelf_endpoint( @@ -299,6 +381,7 @@ impl ApiClientWrapper { serial, })), None, + ApiCredentialKind::Bmc, ) .await } @@ -308,9 +391,11 @@ impl ApiClientWrapper { addr: BmcAddr, metadata: Option, rack_id: Option, + credential_kind: ApiCredentialKind, ) -> Result { let provider = ApiCredentialProvider { client: self.client.clone(), + kind: credential_kind, }; let credentials = provider.fetch_credentials(&addr).await?; diff --git a/crates/health/src/config.rs b/crates/health/src/config.rs index 45395a2b34..894998531e 100644 --- a/crates/health/src/config.rs +++ b/crates/health/src/config.rs @@ -123,6 +123,17 @@ pub struct StaticPowerShelfEndpoint { pub serial: Option, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub enum StaticSwitchEndpointRole { + Bmc, + Host, +} + +fn default_static_switch_endpoint_role() -> StaticSwitchEndpointRole { + StaticSwitchEndpointRole::Host +} + #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] #[serde(deny_unknown_fields)] pub struct StaticSwitchEndpoint { @@ -130,6 +141,12 @@ pub struct StaticSwitchEndpoint { pub serial: Option, pub slot_number: Option, pub tray_index: Option, + #[serde(default = "default_static_switch_endpoint_role")] + pub endpoint_role: StaticSwitchEndpointRole, + #[serde(default)] + pub is_primary: bool, + #[serde(default)] + pub nmxt_enabled: Option, } impl Debug for StaticBmcEndpoint { @@ -1449,6 +1466,66 @@ power_shelf = { id = "fps100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1 ); } + #[test] + fn test_static_switch_host_accepts_primary_without_nmxt_override() { + let toml_content = r#" +[endpoint_sources.carbide_api] +enabled = false + +[[endpoint_sources.static_bmc_endpoints]] +ip = "10.0.1.1" +mac = "11:22:33:44:55:66" +username = "admin" +password = "pass" +switch = { id = "fsw100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0", serial = "SN-SW-001", endpoint_role = "host", is_primary = true } +"#; + + let config: Config = Figment::new() + .merge(Serialized::defaults(Config::default())) + .merge(Toml::string(toml_content)) + .extract() + .expect("static switch host config should parse"); + + let switch = config.endpoint_sources.static_bmc_endpoints[0] + .switch + .as_ref() + .expect("switch metadata"); + + assert_eq!(switch.endpoint_role, StaticSwitchEndpointRole::Host); + assert!(switch.is_primary); + assert_eq!(switch.nmxt_enabled, None); + } + + #[test] + fn test_static_switch_host_accepts_nmxt_override() { + let toml_content = r#" +[endpoint_sources.carbide_api] +enabled = false + +[[endpoint_sources.static_bmc_endpoints]] +ip = "10.0.1.2" +mac = "11:22:33:44:55:77" +username = "admin" +password = "pass" +switch = { id = "fsw100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0", serial = "SN-SW-002", endpoint_role = "host", is_primary = false, nmxt_enabled = true } +"#; + + let config: Config = Figment::new() + .merge(Serialized::defaults(Config::default())) + .merge(Toml::string(toml_content)) + .extract() + .expect("static switch host config should parse"); + + let switch = config.endpoint_sources.static_bmc_endpoints[0] + .switch + .as_ref() + .expect("switch metadata"); + + assert_eq!(switch.endpoint_role, StaticSwitchEndpointRole::Host); + assert!(!switch.is_primary); + assert_eq!(switch.nmxt_enabled, Some(true)); + } + #[test] fn test_static_machine_endpoint_accepts_placement_and_nvlink_metadata() { let toml_content = r#" @@ -1517,7 +1594,7 @@ switch = { serial = "SN-SW-001" } .extract() .expect("could not parse config toml file"); - assert_eq!(config.endpoint_sources.static_bmc_endpoints.len(), 3); + assert_eq!(config.endpoint_sources.static_bmc_endpoints.len(), 4); assert!( config.endpoint_sources.static_bmc_endpoints[0] .switch @@ -1546,17 +1623,38 @@ switch = { serial = "SN-SW-001" } .switch .as_ref() .and_then(|switch| switch.serial.as_deref()), - Some("SN-SWITCH-001") + Some("SN-SWITCH-BMC-001") + ); + assert_eq!( + config.endpoint_sources.static_bmc_endpoints[1] + .switch + .as_ref() + .map(|switch| switch.endpoint_role), + Some(StaticSwitchEndpointRole::Bmc) + ); + assert_eq!( + config.endpoint_sources.static_bmc_endpoints[2] + .switch + .as_ref() + .and_then(|switch| switch.serial.as_deref()), + Some("SN-SWITCH-HOST-001") ); assert_eq!( config.endpoint_sources.static_bmc_endpoints[2] + .switch + .as_ref() + .map(|switch| switch.endpoint_role), + Some(StaticSwitchEndpointRole::Host) + ); + assert_eq!( + config.endpoint_sources.static_bmc_endpoints[3] .power_shelf .as_ref() .and_then(|power_shelf| power_shelf.id.as_deref()), Some("fps100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0") ); assert_eq!( - config.endpoint_sources.static_bmc_endpoints[2] + config.endpoint_sources.static_bmc_endpoints[3] .power_shelf .as_ref() .and_then(|power_shelf| power_shelf.serial.as_deref()), diff --git a/crates/health/src/discovery/iteration.rs b/crates/health/src/discovery/iteration.rs index 5eecd13878..4bb1407f5c 100644 --- a/crates/health/src/discovery/iteration.rs +++ b/crates/health/src/discovery/iteration.rs @@ -100,7 +100,9 @@ mod tests { use mac_address::MacAddress; use super::*; - use crate::endpoint::{BmcAddr, BmcCredentials, EndpointMetadata, SwitchData}; + use crate::endpoint::{ + BmcAddr, BmcCredentials, EndpointMetadata, SwitchData, SwitchEndpointRole, + }; fn endpoint(mac: MacAddress, switch: bool) -> Arc { Arc::new(BmcEndpoint::with_fixed_credentials( @@ -119,6 +121,9 @@ mod tests { serial: format!("serial-{mac}"), slot_number: None, tray_index: None, + endpoint_role: SwitchEndpointRole::Host, + is_primary: false, + nmxt_enabled: false, })) } else { None diff --git a/crates/health/src/discovery/spawn.rs b/crates/health/src/discovery/spawn.rs index 2bfbc901e8..e86401c8d6 100644 --- a/crates/health/src/discovery/spawn.rs +++ b/crates/health/src/discovery/spawn.rs @@ -28,7 +28,7 @@ use crate::collectors::{ StreamingCollectorStartContext, }; use crate::config::{Configurable, LogCollectionMode}; -use crate::endpoint::{BmcEndpoint, EndpointMetadata}; +use crate::endpoint::{BmcEndpoint, SwitchEndpointRole}; use crate::sink::DataSink; fn logs_state_file_path(template: &str, endpoint_id: &str) -> PathBuf { @@ -40,9 +40,26 @@ pub(super) async fn spawn_collectors_for_endpoint( endpoint: &Arc, data_sink: Option>, metrics_prefix: &str, +) -> Result<(), HealthError> { + match endpoint.switch_data().map(|switch| switch.endpoint_role) { + Some(SwitchEndpointRole::Host) => { + spawn_switch_host_collectors(ctx, endpoint, data_sink, metrics_prefix) + } + Some(SwitchEndpointRole::Bmc) | None => { + spawn_generic_redfish_collectors(ctx, endpoint, data_sink, metrics_prefix) + } + } +} + +fn spawn_generic_redfish_collectors( + ctx: &mut DiscoveryLoopContext, + endpoint: &Arc, + data_sink: Option>, + metrics_prefix: &str, ) -> Result<(), HealthError> { let key = endpoint.key(); let endpoint_arc = endpoint.clone(); + if let Configurable::Enabled(sensor_cfg) = &ctx.sensors_config && !ctx.collectors.contains(CollectorKind::Sensor, &key) { @@ -216,7 +233,7 @@ pub(super) async fn spawn_collectors_for_endpoint( metrics_prefix, )?); match Collector::start::>( - endpoint_arc.clone(), + endpoint_arc, LeakDetectorCollectorConfig { data_sink: data_sink.clone(), state_refresh_interval: leak_detector_cfg.state_refresh_interval, @@ -250,9 +267,23 @@ pub(super) async fn spawn_collectors_for_endpoint( } } - if let Configurable::Enabled(nmxt_cfg) = &ctx.nmxt_config + Ok(()) +} + +fn spawn_switch_host_collectors( + ctx: &mut DiscoveryLoopContext, + endpoint: &Arc, + data_sink: Option>, + metrics_prefix: &str, +) -> Result<(), HealthError> { + let key = endpoint.key(); + let endpoint_arc = endpoint.clone(); + + if endpoint + .switch_data() + .is_some_and(|switch| switch.nmxt_enabled) + && let Configurable::Enabled(nmxt_cfg) = &ctx.nmxt_config && !ctx.collectors.contains(CollectorKind::Nmxt, &key) - && matches!(endpoint.metadata, Some(EndpointMetadata::Switch(_))) { let collector_registry = Arc::new( ctx.metrics_manager @@ -279,13 +310,13 @@ pub(super) async fn spawn_collectors_for_endpoint( tracing::info!( endpoint_key = %key, total_nmxt_collectors = ctx.collectors.len(CollectorKind::Nmxt), - "Started NMX-T collection for BMC endpoint" + "Started NMX-T collection for switch host endpoint" ); } Err(error) => { tracing::error!( ?error, - "Could not start NMX-T collector for: {:?}", + "Could not start NMX-T collector for switch host: {:?}", endpoint.addr ) } @@ -295,7 +326,6 @@ pub(super) async fn spawn_collectors_for_endpoint( if let Configurable::Enabled(nvue_cfg) = &ctx.nvue_config && let Configurable::Enabled(rest_cfg) = &nvue_cfg.rest && !ctx.collectors.contains(CollectorKind::NvueRest, &key) - && matches!(endpoint.metadata, Some(EndpointMetadata::Switch(_))) { let collector_registry = Arc::new( ctx.metrics_manager @@ -322,13 +352,13 @@ pub(super) async fn spawn_collectors_for_endpoint( tracing::info!( endpoint_key = %key, total_nvue_rest_collectors = ctx.collectors.len(CollectorKind::NvueRest), - "Started NVUE REST collection for BMC endpoint" + "Started NVUE REST collection for switch host endpoint" ); } Err(error) => { tracing::error!( ?error, - "Could not start NVUE REST collector for: {:?}", + "Could not start NVUE REST collector for switch host: {:?}", endpoint.addr ) } @@ -347,87 +377,303 @@ mod tests { use super::*; use crate::config::{Config, Configurable}; - use crate::endpoint::{BmcAddr, BmcCredentials, EndpointMetadata, SwitchData}; + use crate::endpoint::{ + BmcAddr, BmcCredentials, EndpointMetadata, MachineData, SwitchData, SwitchEndpointRole, + }; use crate::limiter::{NoopLimiter, RateLimiter}; use crate::metrics::MetricsManager; + use crate::sink::{CollectorEvent, EventContext}; - #[test] - fn test_logs_state_file_path_replaces_endpoint_id() { - let path = logs_state_file_path("/tmp/logs_{machine_id}.json", "endpoint-42"); - assert_eq!(path, PathBuf::from("/tmp/logs_endpoint-42.json")); + struct NoopSink; + + impl DataSink for NoopSink { + fn sink_type(&self) -> &'static str { + "noop" + } + + fn handle_event(&self, _context: &EventContext, _event: &CollectorEvent) {} } - #[test] - fn test_endpoint_log_identity_falls_back_to_mac_without_metadata() { - let endpoint = BmcEndpoint::with_fixed_credentials( + fn context_with_config(config: Config, metrics_name: &str) -> DiscoveryLoopContext { + let limiter: Arc = Arc::new(NoopLimiter); + let metrics_manager = + Arc::new(MetricsManager::new(metrics_name).expect("metrics manager should initialize")); + DiscoveryLoopContext::new(limiter, metrics_manager, Arc::new(config)) + .expect("context should initialize") + } + + fn test_endpoint( + ip: Ipv4Addr, + mac: &str, + metadata: Option, + ) -> Arc { + Arc::new(BmcEndpoint::with_fixed_credentials( BmcAddr { - ip: IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), + ip: IpAddr::V4(ip), port: Some(443), - mac: MacAddress::from_str("aa:bb:cc:dd:ee:ff").unwrap(), + mac: MacAddress::from_str(mac).expect("valid mac address"), }, BmcCredentials::UsernamePassword { username: "user".to_string(), password: Some("pass".to_string()), }, + metadata, None, - None, - ); + )) + } + + fn switch_metadata_with_role( + endpoint_role: SwitchEndpointRole, + is_primary: bool, + nmxt_enabled: bool, + serial: &str, + ) -> EndpointMetadata { + EndpointMetadata::Switch(SwitchData { + id: None, + serial: serial.to_string(), + slot_number: None, + tray_index: None, + endpoint_role, + is_primary, + nmxt_enabled, + }) + } + + fn switch_metadata() -> EndpointMetadata { + switch_metadata_with_role(SwitchEndpointRole::Host, false, false, "switch-serial-1") + } + + fn machine_metadata() -> EndpointMetadata { + EndpointMetadata::Machine(MachineData { + machine_id: "fm100htjtiaehv1n5vh67tbmqq4eabcjdng40f7jupsadbedhruh6rag1l0" + .parse() + .expect("valid machine id"), + machine_serial: None, + slot_number: None, + tray_index: None, + nvlink_domain_uuid: None, + }) + } + + #[test] + fn test_logs_state_file_path_replaces_endpoint_id() { + let path = logs_state_file_path("/tmp/logs_{machine_id}.json", "endpoint-42"); + assert_eq!(path, PathBuf::from("/tmp/logs_endpoint-42.json")); + } + + #[test] + fn test_endpoint_log_identity_falls_back_to_mac_without_metadata() { + let endpoint = test_endpoint(Ipv4Addr::new(10, 0, 0, 1), "aa:bb:cc:dd:ee:ff", None); assert_eq!(endpoint.log_identity().as_ref(), "AA:BB:CC:DD:EE:FF"); } #[test] fn test_endpoint_log_identity_uses_switch_serial_when_available() { - let endpoint = BmcEndpoint::with_fixed_credentials( - BmcAddr { - ip: IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2)), - port: Some(443), - mac: MacAddress::from_str("11:22:33:44:55:66").unwrap(), - }, - BmcCredentials::UsernamePassword { - username: "user".to_string(), - password: Some("pass".to_string()), - }, - Some(EndpointMetadata::Switch(SwitchData { - id: None, - serial: "switch-serial-1".to_string(), - slot_number: None, - tray_index: None, - })), - None, + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 2), + "11:22:33:44:55:66", + Some(switch_metadata()), ); assert_eq!(endpoint.log_identity().as_ref(), "switch-serial-1"); } #[tokio::test] - async fn test_spawn_is_idempotent_when_collectors_are_disabled() { + async fn test_switch_endpoint_does_not_start_generic_redfish_collectors() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Enabled(Default::default()); + config.collectors.logs = Configurable::Enabled(Default::default()); + config.collectors.firmware = Configurable::Enabled(Default::default()); + config.collectors.leak_detector = Configurable::Enabled(Default::default()); + config.collectors.nmxt = Configurable::Disabled; + config.collectors.nvue = Configurable::Disabled; + + let mut ctx = context_with_config(config, "test_switch_generic_redfish_gate"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 6), + "55:66:77:88:99:aa", + Some(switch_metadata()), + ); + + spawn_collectors_for_endpoint( + &mut ctx, + &endpoint, + Some(Arc::new(NoopSink)), + "test_switch_generic_redfish_gate", + ) + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Sensor), 0); + assert_eq!(ctx.collectors.len(CollectorKind::Logs), 0); + assert_eq!(ctx.collectors.len(CollectorKind::Firmware), 0); + assert_eq!(ctx.collectors.len(CollectorKind::LeakDetector), 0); + } + + #[tokio::test] + async fn test_switch_bmc_endpoint_starts_redfish_but_not_switch_host_collectors() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Enabled(Default::default()); + config.collectors.logs = Configurable::Disabled; + config.collectors.firmware = Configurable::Disabled; + config.collectors.leak_detector = Configurable::Disabled; + config.collectors.nmxt = Configurable::Enabled(Default::default()); + config.collectors.nvue = Configurable::Enabled(Default::default()); + + let mut ctx = context_with_config(config, "test_switch_bmc_redfish_only"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 8), + "55:66:77:88:99:bb", + Some(switch_metadata_with_role( + SwitchEndpointRole::Bmc, + true, + false, + "switch-bmc", + )), + ); + + spawn_collectors_for_endpoint(&mut ctx, &endpoint, None, "test_switch_bmc_redfish_only") + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Sensor), 1); + assert_eq!(ctx.collectors.len(CollectorKind::Nmxt), 0); + assert_eq!(ctx.collectors.len(CollectorKind::NvueRest), 0); + } + + #[tokio::test] + async fn test_switch_host_primary_starts_nmxt_and_nvue_rest_when_globally_enabled() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Disabled; + config.collectors.logs = Configurable::Disabled; + config.collectors.firmware = Configurable::Disabled; + config.collectors.leak_detector = Configurable::Disabled; + config.collectors.nmxt = Configurable::Enabled(Default::default()); + config.collectors.nvue = Configurable::Enabled(Default::default()); + + let mut ctx = context_with_config(config, "test_switch_host_nmxt_nvue_enabled"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 9), + "55:66:77:88:99:cc", + Some(switch_metadata_with_role( + SwitchEndpointRole::Host, + true, + true, + "switch-host", + )), + ); + + spawn_collectors_for_endpoint(&mut ctx, &endpoint, None, "test") + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Sensor), 0); + assert_eq!(ctx.collectors.len(CollectorKind::Nmxt), 1); + assert_eq!(ctx.collectors.len(CollectorKind::NvueRest), 1); + } + + #[tokio::test] + async fn test_switch_host_policy_gates_nmxt_but_not_nvue_rest() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Disabled; + config.collectors.logs = Configurable::Disabled; + config.collectors.firmware = Configurable::Disabled; + config.collectors.leak_detector = Configurable::Disabled; + config.collectors.nmxt = Configurable::Enabled(Default::default()); + config.collectors.nvue = Configurable::Enabled(Default::default()); + + let mut ctx = context_with_config(config, "test_switch_host_nmxt_endpoint_disabled"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 10), + "55:66:77:88:99:dd", + Some(switch_metadata_with_role( + SwitchEndpointRole::Host, + false, + false, + "switch-host", + )), + ); + + spawn_collectors_for_endpoint(&mut ctx, &endpoint, None, "test") + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Nmxt), 0); + assert_eq!(ctx.collectors.len(CollectorKind::NvueRest), 1); + } + + #[tokio::test] + async fn test_switch_host_does_not_start_host_collectors_when_globally_disabled() { let mut config = Config::default(); config.collectors.sensors = Configurable::Disabled; config.collectors.logs = Configurable::Disabled; config.collectors.firmware = Configurable::Disabled; config.collectors.leak_detector = Configurable::Disabled; config.collectors.nmxt = Configurable::Disabled; + config.collectors.nvue = Configurable::Disabled; - let limiter: Arc = Arc::new(NoopLimiter); - let metrics_manager = - Arc::new(MetricsManager::new("test").expect("metrics manager should initialize")); - let mut ctx = DiscoveryLoopContext::new(limiter, metrics_manager, Arc::new(config)) - .expect("context should initialize"); + let mut ctx = context_with_config(config, "test_switch_host_collectors_global_disabled"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 11), + "55:66:77:88:99:ee", + Some(switch_metadata_with_role( + SwitchEndpointRole::Host, + true, + true, + "switch-host", + )), + ); - let endpoint = Arc::new(BmcEndpoint::with_fixed_credentials( - BmcAddr { - ip: IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), - port: Some(443), - mac: MacAddress::from_str("aa:bb:cc:dd:ee:ff").unwrap(), - }, - BmcCredentials::UsernamePassword { - username: "user".to_string(), - password: Some("pass".to_string()), - }, - None, - None, - )); + spawn_collectors_for_endpoint(&mut ctx, &endpoint, None, "test") + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Nmxt), 0); + assert_eq!(ctx.collectors.len(CollectorKind::NvueRest), 0); + } + + #[tokio::test] + async fn test_machine_endpoint_still_starts_sse_logs_collector() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Disabled; + config.collectors.logs = Configurable::Enabled(Default::default()); + config.collectors.firmware = Configurable::Disabled; + config.collectors.leak_detector = Configurable::Disabled; + config.collectors.nmxt = Configurable::Disabled; + config.collectors.nvue = Configurable::Disabled; + + let mut ctx = context_with_config(config, "test_machine_sse_logs_collector"); + let endpoint = test_endpoint( + Ipv4Addr::new(10, 0, 0, 7), + "66:77:88:99:aa:bb", + Some(machine_metadata()), + ); + + spawn_collectors_for_endpoint( + &mut ctx, + &endpoint, + Some(Arc::new(NoopSink)), + "test_machine_sse_logs_collector", + ) + .await + .expect("spawn should succeed"); + + assert_eq!(ctx.collectors.len(CollectorKind::Logs), 1); + } + + #[tokio::test] + async fn test_spawn_is_idempotent_when_collectors_are_disabled() { + let mut config = Config::default(); + config.collectors.sensors = Configurable::Disabled; + config.collectors.logs = Configurable::Disabled; + config.collectors.firmware = Configurable::Disabled; + config.collectors.leak_detector = Configurable::Disabled; + config.collectors.nmxt = Configurable::Disabled; + config.collectors.nvue = Configurable::Disabled; + + let mut ctx = context_with_config(config, "test_disabled_collectors"); + let endpoint = test_endpoint(Ipv4Addr::new(10, 0, 0, 1), "aa:bb:cc:dd:ee:ff", None); spawn_collectors_for_endpoint(&mut ctx, &endpoint, None, "test") .await diff --git a/crates/health/src/endpoint/mod.rs b/crates/health/src/endpoint/mod.rs index 191679ad79..081caef8d8 100644 --- a/crates/health/src/endpoint/mod.rs +++ b/crates/health/src/endpoint/mod.rs @@ -20,7 +20,7 @@ mod sources; pub use model::{ BmcAddr, BmcCredentials, BmcEndpoint, BoxFuture, CredentialProvider, EndpointMetadata, - EndpointSource, MachineData, PowerShelfData, SwitchData, + EndpointSource, MachineData, PowerShelfData, SwitchData, SwitchEndpointRole, }; pub use sources::{CompositeEndpointSource, StaticEndpointSource}; @@ -186,6 +186,9 @@ mod tests { serial: Some("SN-001".to_string()), slot_number: Some(7), tray_index: Some(3), + endpoint_role: crate::config::StaticSwitchEndpointRole::Host, + is_primary: true, + nmxt_enabled: None, }), rack_id: None, }]; @@ -200,6 +203,9 @@ mod tests { assert_eq!(s.serial, "SN-001"); assert_eq!(s.slot_number, Some(7)); assert_eq!(s.tray_index, Some(3)); + assert_eq!(s.endpoint_role, SwitchEndpointRole::Host); + assert!(s.is_primary); + assert!(s.nmxt_enabled); } other => panic!("expected Switch metadata, got {other:?}"), } diff --git a/crates/health/src/endpoint/model.rs b/crates/health/src/endpoint/model.rs index 06b2cf2a3a..bebfba1d10 100644 --- a/crates/health/src/endpoint/model.rs +++ b/crates/health/src/endpoint/model.rs @@ -106,6 +106,10 @@ impl BmcEndpoint { } } + pub fn switch_data(&self) -> Option<&SwitchData> { + self.metadata.as_ref().and_then(EndpointMetadata::as_switch) + } + pub fn credentials(&self) -> BmcCredentials { self.credentials.read().expect("lock poisoned").to_owned() } @@ -128,6 +132,13 @@ pub enum EndpointMetadata { } impl EndpointMetadata { + pub fn as_switch(&self) -> Option<&SwitchData> { + match self { + EndpointMetadata::Switch(switch) => Some(switch), + _ => None, + } + } + pub fn serial_number(&self) -> Option<&str> { match self { EndpointMetadata::Machine(machine) => machine.machine_serial.as_deref(), @@ -152,12 +163,21 @@ pub struct PowerShelfData { pub serial: String, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SwitchEndpointRole { + Bmc, + Host, +} + #[derive(Clone, Debug)] pub struct SwitchData { pub id: Option, pub serial: String, pub slot_number: Option, pub tray_index: Option, + pub endpoint_role: SwitchEndpointRole, + pub is_primary: bool, + pub nmxt_enabled: bool, } #[derive(Clone)] diff --git a/crates/health/src/endpoint/sources.rs b/crates/health/src/endpoint/sources.rs index e2bc6bf8a0..6023f20c2b 100644 --- a/crates/health/src/endpoint/sources.rs +++ b/crates/health/src/endpoint/sources.rs @@ -23,10 +23,10 @@ use carbide_uuid::rack::RackId; use mac_address::MacAddress; use crate::HealthError; -use crate::config::StaticBmcEndpoint; +use crate::config::{StaticBmcEndpoint, StaticSwitchEndpointRole}; use crate::endpoint::{ BmcAddr, BmcCredentials, BmcEndpoint, BoxFuture, EndpointMetadata, EndpointSource, MachineData, - PowerShelfData, SwitchData, + PowerShelfData, SwitchData, SwitchEndpointRole, }; pub struct StaticEndpointSource { @@ -93,12 +93,20 @@ impl StaticEndpointSource { .clone() .or_else(|| switch.id.clone()) .unwrap_or_else(|| cfg.mac.clone()); + let endpoint_role = match switch.endpoint_role { + StaticSwitchEndpointRole::Bmc => SwitchEndpointRole::Bmc, + StaticSwitchEndpointRole::Host => SwitchEndpointRole::Host, + }; + let nmxt_enabled = switch.nmxt_enabled.unwrap_or(switch.is_primary); Some(EndpointMetadata::Switch(SwitchData { id, serial, slot_number: switch.slot_number, tray_index: switch.tray_index, + endpoint_role, + is_primary: switch.is_primary, + nmxt_enabled, })) } else if let Some(machine) = &cfg.machine { let machine_id = &machine.id; diff --git a/crates/health/src/otlp/convert.rs b/crates/health/src/otlp/convert.rs index fab1b521a6..168c453862 100644 --- a/crates/health/src/otlp/convert.rs +++ b/crates/health/src/otlp/convert.rs @@ -204,7 +204,7 @@ mod tests { use mac_address::MacAddress; use super::*; - use crate::endpoint::{BmcAddr, EndpointMetadata, MachineData, SwitchData}; + use crate::endpoint::{BmcAddr, EndpointMetadata, MachineData, SwitchData, SwitchEndpointRole}; use crate::sink::{ Classification, HealthReport, HealthReportAlert, LogRecord, Probe, ReportSource, }; @@ -302,6 +302,9 @@ mod tests { serial: "SN-SWITCH-001".to_string(), slot_number: Some(7), tray_index: Some(3), + endpoint_role: SwitchEndpointRole::Host, + is_primary: false, + nmxt_enabled: false, })), rack_id: None, }; diff --git a/crates/health/src/sink/prometheus.rs b/crates/health/src/sink/prometheus.rs index e1c70aae6f..e36c1e472f 100644 --- a/crates/health/src/sink/prometheus.rs +++ b/crates/health/src/sink/prometheus.rs @@ -241,7 +241,7 @@ mod tests { use mac_address::MacAddress; use super::*; - use crate::endpoint::{BmcAddr, EndpointMetadata, MachineData, SwitchData}; + use crate::endpoint::{BmcAddr, EndpointMetadata, MachineData, SwitchData, SwitchEndpointRole}; fn test_switch_id(label: &str) -> SwitchId { let mut hash = [0u8; 32]; @@ -309,6 +309,9 @@ mod tests { serial: "SN-SWITCH-001".to_string(), slot_number: Some(7), tray_index: Some(3), + endpoint_role: SwitchEndpointRole::Host, + is_primary: false, + nmxt_enabled: false, })), rack_id: None, }; diff --git a/crates/rpc/build.rs b/crates/rpc/build.rs index ec6fa45475..a81131b51b 100644 --- a/crates/rpc/build.rs +++ b/crates/rpc/build.rs @@ -846,7 +846,12 @@ fn main() -> Result<(), Box> { .type_attribute( "forge.GetBmcCredentialsRequest", "#[derive(serde::Serialize)]", - ).type_attribute( + ) + .type_attribute( + "forge.GetSwitchNvosCredentialsRequest", + "#[derive(serde::Serialize)]", + ) + .type_attribute( "forge.PlacementInRack", "#[derive(serde::Serialize)]", ) diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index acc106a9d7..575f1bdfbc 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -264,6 +264,7 @@ service Forge { // Query for BMC Credentials rpc GetBmcCredentials(GetBmcCredentialsRequest) returns (GetBmcCredentialsResponse); + rpc GetSwitchNvosCredentials(GetSwitchNvosCredentialsRequest) returns (GetBmcCredentialsResponse); // Admin CLI actions @@ -2137,6 +2138,9 @@ message Switch { optional PlacementInRack placement_in_rack = 11; reserved 12, 13; bool is_primary = 14; + + // NVOS host endpoint + BmcInfo nvos_info = 15; } message SwitchList { @@ -3751,6 +3755,10 @@ message GetBmcCredentialsRequest { string mac_addr = 1; } +message GetSwitchNvosCredentialsRequest { + common.SwitchId switch_id = 1; +} + message GetBmcCredentialsResponse { BmcCredentials credentials = 1; } diff --git a/crates/rpc/src/model/switch.rs b/crates/rpc/src/model/switch.rs index 74b0105093..11549fe925 100644 --- a/crates/rpc/src/model/switch.rs +++ b/crates/rpc/src/model/switch.rs @@ -187,6 +187,7 @@ impl TryFrom for rpc::Switch { deleted, controller_state, bmc_info: None, + nvos_info: None, state_version, metadata: Some(src.metadata.into()), version: src.version.version_string(),