From 79a857cb2bf30029b5882d8b1f019428271e7a13 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Wed, 25 Feb 2026 14:21:39 +0100 Subject: [PATCH 01/66] feat(orb-jobs-agent): allow local jobs execution --- orb-jobs-agent/src/args.rs | 10 ++ orb-jobs-agent/src/job_system/client.rs | 145 +++++++++++++++++++---- orb-jobs-agent/src/job_system/handler.rs | 83 ++++++++----- orb-jobs-agent/src/main.rs | 17 ++- orb-jobs-agent/src/program.rs | 16 ++- orb-jobs-agent/src/settings.rs | 95 +++++++++------ orb-jobs-agent/tests/common/fixture.rs | 9 +- orb-jobs-agent/tests/job_handler.rs | 30 ++++- 8 files changed, 306 insertions(+), 99 deletions(-) diff --git a/orb-jobs-agent/src/args.rs b/orb-jobs-agent/src/args.rs index 2e544ea8e..78570bd7e 100644 --- a/orb-jobs-agent/src/args.rs +++ b/orb-jobs-agent/src/args.rs @@ -36,6 +36,16 @@ pub struct Args { /// The target job-server service id to send messages to. #[clap(long, env = "TARGET_SERVICE_ID", default_value = "job-server")] pub target_service_id: Option, + /// D-Bus address (defaults to DBUS_SESSION_BUS_ADDRESS or unix:path=/tmp/worldcoin_bus_socket). + #[clap( + long, + env = "DBUS_SESSION_BUS_ADDRESS", + default_value = "unix:path=/tmp/worldcoin_bus_socket" + )] + pub dbus_addr: String, + /// Run a single job document locally instead of connecting to relay. + #[clap(long)] + pub run_job: Option, } fn clap_v3_styles() -> Styles { diff --git a/orb-jobs-agent/src/job_system/client.rs b/orb-jobs-agent/src/job_system/client.rs index 67f40ade6..068c86791 100644 --- a/orb-jobs-agent/src/job_system/client.rs +++ b/orb-jobs-agent/src/job_system/client.rs @@ -12,37 +12,97 @@ use orb_relay_messages::{ prost_types::Any, relay::entity::EntityType, }; +use std::sync::Arc; +use tokio::sync::Mutex; +use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; +#[derive(Debug, Clone)] +pub(crate) enum JobTransport { + Relay { + relay_client: Client, + target_service_id: String, + relay_namespace: String, + }, + Local(LocalTransport), +} + +#[derive(Debug, Clone)] +pub(crate) struct LocalTransport { + pending_job: Arc>>, + shutdown: CancellationToken, +} + #[derive(Debug, Clone)] pub struct JobClient { - relay_client: Client, - target_service_id: String, - relay_namespace: String, + transport: JobTransport, job_registry: JobRegistry, job_config: JobConfig, } impl JobClient { - pub fn new( - relay_client: Client, - target_service_id: &str, - relay_namespace: &str, + pub(crate) fn new( + transport: JobTransport, job_registry: JobRegistry, job_config: JobConfig, ) -> Self { Self { + transport, + job_registry, + job_config, + } + } +} + +impl JobTransport { + pub(crate) fn service( + relay_client: Client, + target_service_id: &str, + relay_namespace: &str, + ) -> Self { + Self::Relay { relay_client, target_service_id: target_service_id.to_string(), relay_namespace: relay_namespace.to_string(), - job_registry, - job_config, } } + pub(crate) fn local(job: JobExecution, shutdown: CancellationToken) -> Self { + Self::Local(LocalTransport { + pending_job: Arc::new(Mutex::new(Some(job))), + shutdown, + }) + } +} + +impl JobClient { pub async fn listen_for_job(&self) -> Result { + if let JobTransport::Local(local) = &self.transport { + loop { + let next_job = local.pending_job.lock().await.take(); + + if let Some(job) = next_job { + info!( + job_id = %job.job_id, + job_execution_id = %job.job_execution_id, + job_document = %redact_job_document(&job.job_document), + should_cancel = job.should_cancel, + "received local JobExecution" + ); + return Ok(job); + } + + std::future::pending::<()>().await; + } + } + + let relay_client = match &self.transport { + JobTransport::Relay { relay_client, .. } => relay_client, + JobTransport::Local(_) => unreachable!(), + }; + loop { - match self.relay_client.recv().await { + match relay_client.recv().await { Ok(msg) => { let any = match Any::decode(msg.payload.as_slice()) { Ok(any) => any, @@ -119,6 +179,15 @@ impl JobClient { /// Requests for a next job to be run, excluding the ones that are /// currently running (determined by `running_job_execution_ids` arg) pub async fn request_next_job(&self) -> Result<(), orb_relay_client::Err> { + let (relay_client, target_service_id, relay_namespace) = match &self.transport { + JobTransport::Relay { + relay_client, + target_service_id, + relay_namespace, + } => (relay_client, target_service_id, relay_namespace), + JobTransport::Local(_) => return Ok(()), + }; + let mut running_ids = self.job_registry.get_active_job_ids().await; let mut completed_ids = self.job_registry.get_completed_job_ids().await; @@ -130,11 +199,11 @@ impl JobClient { }; let any = Any::from_msg(&job_request).unwrap(); - self.relay_client + relay_client .send( SendMessage::to(EntityType::Service) - .id(self.target_service_id.clone()) - .namespace(self.relay_namespace.clone()) + .id(target_service_id.clone()) + .namespace(relay_namespace.clone()) .qos(QoS::AtLeastOnce) .payload(any.encode_to_vec()), ) @@ -176,6 +245,39 @@ impl JobClient { &self, job_update: &JobExecutionUpdate, ) -> Result<(), orb_relay_client::Err> { + if let JobTransport::Local(local) = &self.transport { + let escaped_stdout = + serde_json::to_string(&job_update.std_out).unwrap_or_default(); + let escaped_stderr = + serde_json::to_string(&job_update.std_err).unwrap_or_default(); + let escaped_job_id = + serde_json::to_string(&job_update.job_id).unwrap_or_default(); + let escaped_execution_id = + serde_json::to_string(&job_update.job_execution_id).unwrap_or_default(); + let serialized = format!( + "{{\"job_id\":{escaped_job_id},\"job_execution_id\":{escaped_execution_id},\"status\":{},\"std_out\":{escaped_stdout},\"std_err\":{escaped_stderr}}}", + job_update.status + ); + println!("{serialized}"); + + if job_update.status + != orb_relay_messages::jobs::v1::JobExecutionStatus::InProgress as i32 + { + local.shutdown.cancel(); + } + + return Ok(()); + } + + let (relay_client, target_service_id, relay_namespace) = match &self.transport { + JobTransport::Relay { + relay_client, + target_service_id, + relay_namespace, + } => (relay_client, target_service_id, relay_namespace), + JobTransport::Local(_) => unreachable!(), + }; + info!( job_execution_id = %job_update.job_execution_id, job_id = %job_update.job_id, @@ -183,11 +285,11 @@ impl JobClient { job_update ); let any = Any::from_msg(job_update).unwrap(); - self.relay_client + relay_client .send( SendMessage::to(EntityType::Service) - .id(self.target_service_id.clone()) - .namespace(self.relay_namespace.clone()) + .id(target_service_id.clone()) + .namespace(relay_namespace.clone()) .qos(QoS::AtLeastOnce) .payload(any.encode_to_vec()), ) @@ -211,10 +313,13 @@ impl JobClient { } pub async fn force_relay_reconnect(&self) -> Result<()> { - self.relay_client - .reconnect() - .await - .map_err(|_| eyre!("failed to force reconnect orb relay")) + match &self.transport { + JobTransport::Relay { relay_client, .. } => relay_client + .reconnect() + .await + .map_err(|_| eyre!("failed to force reconnect orb relay")), + JobTransport::Local(_) => Ok(()), + } } } diff --git a/orb-jobs-agent/src/job_system/handler.rs b/orb-jobs-agent/src/job_system/handler.rs index 81e86b268..10393b553 100644 --- a/orb-jobs-agent/src/job_system/handler.rs +++ b/orb-jobs-agent/src/job_system/handler.rs @@ -1,12 +1,12 @@ use super::ctx::Ctx; use crate::{ job_system::{ - client::JobClient, + client::{JobClient, JobTransport}, ctx::JobExecutionUpdateExt, orchestrator::{JobCompletion, JobConfig, JobRegistry, JobStartStatus}, sanitize::{redact_args, redact_job_document, should_sanitize}, }, - program::Deps, + program::{Deps, JobMode}, settings::Settings, }; use color_eyre::Result; @@ -119,38 +119,57 @@ impl JobHandler { } fn new(builder: JobHandlerBuilder, deps: Deps) -> Self { - let Settings { - orb_id, - relay_host, - relay_namespace, - target_service_id, - auth, - .. - } = &deps.settings; - - let opts = ClientOpts::entity(EntityType::Orb) - .id(orb_id.as_str().to_string()) - .endpoint(relay_host) - .namespace(relay_namespace) - .auth(auth.clone()) - .connection_timeout(Duration::from_secs(3)) - .connection_backoff(Duration::from_secs(2)) - .keep_alive_interval(Duration::from_secs(30)) - .keep_alive_timeout(Duration::from_secs(10)) - .ack_timeout(Duration::from_secs(5)) - .build(); - - info!("Connecting to relay: {:?}", relay_host); - let (relay_client, relay_handle) = Client::connect(opts); let job_registry = JobRegistry::new(); let job_config = builder.job_config; - let job_client = JobClient::new( - relay_client.clone(), - target_service_id.as_str(), - relay_namespace, - job_registry.clone(), - job_config.clone(), - ); + let (job_client, relay_handle) = match &deps.job_mode { + JobMode::Service => { + let Settings { + orb_id, + relay_host, + relay_namespace, + target_service_id, + auth, + .. + } = &deps.settings; + + let opts = ClientOpts::entity(EntityType::Orb) + .id(orb_id.as_str().to_string()) + .endpoint(relay_host) + .namespace(relay_namespace) + .auth(auth.clone()) + .connection_timeout(Duration::from_secs(3)) + .connection_backoff(Duration::from_secs(2)) + .keep_alive_interval(Duration::from_secs(30)) + .keep_alive_timeout(Duration::from_secs(10)) + .ack_timeout(Duration::from_secs(5)) + .build(); + + info!("Connecting to relay: {:?}", relay_host); + let (relay_client, relay_handle) = Client::connect(opts); + let transport = JobTransport::service( + relay_client.clone(), + target_service_id.as_str(), + relay_namespace, + ); + let job_client = + JobClient::new(transport, job_registry.clone(), job_config.clone()); + + (job_client, relay_handle) + } + JobMode::LocalSingleJob(job) => { + let shutdown = CancellationToken::new(); + let shutdown_task = shutdown.clone(); + let relay_handle = tokio::spawn(async move { + shutdown_task.cancelled().await; + Ok(()) + }); + let transport = JobTransport::local(job.clone(), shutdown); + let job_client = + JobClient::new(transport, job_registry.clone(), job_config.clone()); + + (job_client, relay_handle) + } + }; Self { state: Arc::new(deps), diff --git a/orb-jobs-agent/src/main.rs b/orb-jobs-agent/src/main.rs index 9dd6fd4b0..91ea47fb8 100644 --- a/orb-jobs-agent/src/main.rs +++ b/orb-jobs-agent/src/main.rs @@ -1,9 +1,10 @@ use clap::Parser; use color_eyre::eyre::Result; use orb_jobs_agent::args::Args; -use orb_jobs_agent::program::{self, Deps}; +use orb_jobs_agent::program::{self, Deps, JobMode}; use orb_jobs_agent::settings::Settings; use orb_jobs_agent::shell::Host; +use orb_relay_messages::jobs::v1::JobExecution; use tracing::info; const SYSLOG_IDENTIFIER: &str = "worldcoin-jobs-agent"; @@ -24,12 +25,24 @@ async fn main() -> Result<()> { async fn run(args: &Args) -> Result<()> { info!("Starting jobs agent: {:?}", args); - let connection = zbus::Connection::session().await?; + let connection = zbus::ConnectionBuilder::address(args.dbus_addr.as_str())? + .build() + .await?; + let job_mode = match &args.run_job { + Some(job_document) => JobMode::LocalSingleJob(JobExecution { + job_id: "local-job".to_string(), + job_execution_id: "local-job-execution".to_string(), + job_document: job_document.clone(), + should_cancel: false, + }), + None => JobMode::Service, + }; let deps = Deps::new( Host, connection, Settings::from_args(args, "/mnt/scratch").await?, + job_mode, ); program::run(deps).await?; diff --git a/orb-jobs-agent/src/program.rs b/orb-jobs-agent/src/program.rs index c3b6dfc3c..7d9eb86a4 100644 --- a/orb-jobs-agent/src/program.rs +++ b/orb-jobs-agent/src/program.rs @@ -12,17 +12,30 @@ use crate::{ shell::Shell, }; use color_eyre::Result; +use orb_relay_messages::jobs::v1::JobExecution; use tokio::fs; +#[derive(Debug, Clone)] +pub enum JobMode { + Service, + LocalSingleJob(JobExecution), +} + /// Dependencies used by the jobs-agent. pub struct Deps { pub shell: Box, pub session_dbus: zbus::Connection, pub settings: Settings, + pub job_mode: JobMode, } impl Deps { - pub fn new(shell: S, session_dbus: zbus::Connection, settings: Settings) -> Self + pub fn new( + shell: S, + session_dbus: zbus::Connection, + settings: Settings, + job_mode: JobMode, + ) -> Self where S: Shell + 'static, { @@ -30,6 +43,7 @@ impl Deps { shell: Box::new(shell), session_dbus, settings, + job_mode, } } } diff --git a/orb-jobs-agent/src/settings.rs b/orb-jobs-agent/src/settings.rs index 4e0f82e65..51fdd13dd 100644 --- a/orb-jobs-agent/src/settings.rs +++ b/orb-jobs-agent/src/settings.rs @@ -17,7 +17,6 @@ use std::{ use tokio::time; use tokio_util::sync::CancellationToken; use tracing::warn; -use zbus::Connection; #[derive(Debug, Clone)] pub struct Settings { @@ -44,6 +43,8 @@ pub struct Settings { impl Settings { pub async fn from_args(args: &Args, store_path: impl AsRef) -> Result { + let is_local_run = args.run_job.is_some(); + let orb_id = if let Some(id) = &args.orb_id { OrbId::from_str(id)? } else { @@ -63,50 +64,70 @@ impl Settings { os_release.orb_os_platform_type }; - let relay_host = args - .relay_host - .clone() - .or_else(|| { - Backend::from_env() - .ok() - .map(|backend| Endpoints::new(backend, &orb_id).relay.to_string()) - }) - .wrap_err("could not get Backend Endpoint from env")?; + let relay_host = if is_local_run { + args.relay_host + .clone() + .unwrap_or_else(|| "http://127.0.0.1:1".to_string()) + } else { + args.relay_host + .clone() + .or_else(|| { + Backend::from_env().ok().map(|backend| { + Endpoints::new(backend, &orb_id).relay.to_string() + }) + }) + .wrap_err("could not get Backend Endpoint from env")? + }; // Get token from DBus - let auth = match &args.orb_token { - Some(t) => Auth::Token(t.as_str().into()), - None => { - let shutdown_token = CancellationToken::new(); - let get_token = async || { - let connection = Connection::session() - .await - .map_err(|e| eyre!("failed to establish zbus conn: {e}"))?; + let auth = if is_local_run { + args.orb_token + .as_ref() + .map(|token| Auth::Token(token.as_str().into())) + .unwrap_or_else(|| Auth::Token(Default::default())) + } else { + match &args.orb_token { + Some(t) => Auth::Token(t.as_str().into()), + None => { + let shutdown_token = CancellationToken::new(); + let get_token = async || { + let connection = + zbus::ConnectionBuilder::address(args.dbus_addr.as_str())? + .build() + .await + .map_err(|e| { + eyre!( + "failed to establish zbus conn at {}: {e}", + args.dbus_addr + ) + })?; - TokenTaskHandle::spawn(&connection, &shutdown_token) - .await - .wrap_err("failed to get auth token!") - }; + TokenTaskHandle::spawn(&connection, &shutdown_token) + .await + .wrap_err("failed to get auth token!") + }; - let token_rec_fut = async { - loop { - match get_token().await { - Err(e) => { - warn!("{e}! trying again in 5s"); - time::sleep(Duration::from_secs(5)).await; - continue; - } + let token_rec_fut = async { + loop { + match get_token().await { + Err(e) => { + warn!("{e}! trying again in 5s"); + time::sleep(Duration::from_secs(5)).await; + continue; + } - Ok(t) => break t.token_recv, + Ok(t) => break t.token_recv, + } } - } - }; + }; - let token_rec = time::timeout(Duration::from_secs(60), token_rec_fut) - .await - .wrap_err("could not get auth token after 60s")?; + let token_rec = + time::timeout(Duration::from_secs(60), token_rec_fut) + .await + .wrap_err("could not get auth token after 60s")?; - Auth::TokenReceiver(token_rec) + Auth::TokenReceiver(token_rec) + } } }; diff --git a/orb-jobs-agent/tests/common/fixture.rs b/orb-jobs-agent/tests/common/fixture.rs index 89d759822..3bfb0f8a7 100644 --- a/orb-jobs-agent/tests/common/fixture.rs +++ b/orb-jobs-agent/tests/common/fixture.rs @@ -8,7 +8,7 @@ use dbus_launch::BusType; use orb_connd_dbus::Connd; use orb_info::OrbId; use orb_jobs_agent::{ - program::{self, Deps}, + program::{self, Deps, JobMode}, settings::Settings, shell::Shell, }; @@ -251,7 +251,12 @@ impl JobAgentFixture { .await .unwrap(); - let deps = Deps::new(shell, self.dbus_conn.clone(), settings.clone()); + let deps = Deps::new( + shell, + self.dbus_conn.clone(), + settings.clone(), + JobMode::Service, + ); let join_handle = task::spawn(async move { tokio::select! { diff --git a/orb-jobs-agent/tests/job_handler.rs b/orb-jobs-agent/tests/job_handler.rs index c99f4a26f..c53aa7fef 100644 --- a/orb-jobs-agent/tests/job_handler.rs +++ b/orb-jobs-agent/tests/job_handler.rs @@ -2,7 +2,7 @@ use color_eyre::eyre::bail; use common::fixture::JobAgentFixture; use orb_jobs_agent::{ job_system::{ctx::JobExecutionUpdateExt, handler::JobHandler}, - program::Deps, + program::{Deps, JobMode}, shell::Host, }; use orb_relay_messages::jobs::v1::JobExecutionStatus; @@ -18,7 +18,12 @@ mod common; async fn sequential_jobs_block_other_jobs_execution() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let deps = Deps::new( + Host, + fx.dbus_conn.clone(), + fx.settings.clone(), + JobMode::Service, + ); let wait_time = Duration::from_millis(100); @@ -46,7 +51,12 @@ async fn sequential_jobs_block_other_jobs_execution() { async fn can_start_parallel_jobs_in_parallel() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let deps = Deps::new( + Host, + fx.dbus_conn.clone(), + fx.settings.clone(), + JobMode::Service, + ); let wait_time = Duration::from_millis(500); @@ -80,7 +90,12 @@ async fn parallel_jobs_dont_exceed_max() { async fn gracefully_handles_unsupported_cmds() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let deps = Deps::new( + Host, + fx.dbus_conn.clone(), + fx.settings.clone(), + JobMode::Service, + ); task::spawn(JobHandler::builder().build(deps).run()); @@ -96,7 +111,12 @@ async fn gracefully_handles_unsupported_cmds() { async fn it_cancels_a_long_running_job() { // Arrange let fx = JobAgentFixture::with_namespace("cancel_long_running_job").await; - let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let deps = Deps::new( + Host, + fx.dbus_conn.clone(), + fx.settings.clone(), + JobMode::Service, + ); let wait_time = Duration::from_millis(50); From 7aa4458f567c45f9de0d1825df82a775be1342f6 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Wed, 25 Feb 2026 14:55:45 +0100 Subject: [PATCH 02/66] feat(orb-jobs-agent): error out on local job failure --- orb-jobs-agent/src/args.rs | 1 - orb-jobs-agent/src/job_system/client.rs | 10 ++++++++++ orb-jobs-agent/src/job_system/handler.rs | 20 +++++++++++++++++++- orb-jobs-agent/src/program.rs | 2 +- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/orb-jobs-agent/src/args.rs b/orb-jobs-agent/src/args.rs index 78570bd7e..57aeef4d3 100644 --- a/orb-jobs-agent/src/args.rs +++ b/orb-jobs-agent/src/args.rs @@ -44,7 +44,6 @@ pub struct Args { )] pub dbus_addr: String, /// Run a single job document locally instead of connecting to relay. - #[clap(long)] pub run_job: Option, } diff --git a/orb-jobs-agent/src/job_system/client.rs b/orb-jobs-agent/src/job_system/client.rs index 068c86791..40ee36ca1 100644 --- a/orb-jobs-agent/src/job_system/client.rs +++ b/orb-jobs-agent/src/job_system/client.rs @@ -30,6 +30,7 @@ pub(crate) enum JobTransport { #[derive(Debug, Clone)] pub(crate) struct LocalTransport { pending_job: Arc>>, + final_status: Arc>>, shutdown: CancellationToken, } @@ -70,6 +71,7 @@ impl JobTransport { pub(crate) fn local(job: JobExecution, shutdown: CancellationToken) -> Self { Self::Local(LocalTransport { pending_job: Arc::new(Mutex::new(Some(job))), + final_status: Arc::new(Mutex::new(None)), shutdown, }) } @@ -263,6 +265,7 @@ impl JobClient { if job_update.status != orb_relay_messages::jobs::v1::JobExecutionStatus::InProgress as i32 { + *local.final_status.lock().await = Some(job_update.status); local.shutdown.cancel(); } @@ -321,6 +324,13 @@ impl JobClient { JobTransport::Local(_) => Ok(()), } } + + pub async fn local_final_status(&self) -> Option { + match &self.transport { + JobTransport::Local(local) => *local.final_status.lock().await, + JobTransport::Relay { .. } => None, + } + } } #[cfg(test)] diff --git a/orb-jobs-agent/src/job_system/handler.rs b/orb-jobs-agent/src/job_system/handler.rs index 10393b553..3fe25e040 100644 --- a/orb-jobs-agent/src/job_system/handler.rs +++ b/orb-jobs-agent/src/job_system/handler.rs @@ -9,6 +9,7 @@ use crate::{ program::{Deps, JobMode}, settings::Settings, }; +use color_eyre::eyre::eyre; use color_eyre::Result; use orb_relay_client::{Client, ClientOpts}; use orb_relay_messages::{ @@ -181,7 +182,7 @@ impl JobHandler { } } - pub async fn run(mut self) { + pub async fn run(mut self) -> Result<()> { // Kickstart job requests. match self.job_client.try_request_more_jobs().await { Ok(true) => { @@ -215,6 +216,23 @@ impl JobHandler { } } } + + if matches!(self.state.job_mode, JobMode::LocalSingleJob(_)) { + let status = + self.job_client.local_final_status().await.ok_or_else(|| { + eyre!("local run ended without terminal job status") + })?; + + if status != JobExecutionStatus::Succeeded as i32 { + let status_name = JobExecutionStatus::try_from(status) + .map(|s| format!("{s:?}")) + .unwrap_or_else(|_| format!("Unknown({status})")); + + return Err(eyre!("local job failed with status {status_name}")); + } + } + + Ok(()) } async fn handle_job(mut self, job: JobExecution) -> Self { diff --git a/orb-jobs-agent/src/program.rs b/orb-jobs-agent/src/program.rs index 7d9eb86a4..736509bc7 100644 --- a/orb-jobs-agent/src/program.rs +++ b/orb-jobs-agent/src/program.rs @@ -90,7 +90,7 @@ pub async fn run(deps: Deps) -> Result<()> { conn_change::spawn_watcher(orb_id, job_handler.job_client.clone(), zenoh_port) .await?; - job_handler.run().await; + job_handler.run().await?; Ok(()) } From 06748724fe673bbfdcd6f9f82bcded0657ebbc25 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Wed, 25 Feb 2026 15:41:02 +0100 Subject: [PATCH 03/66] feat(orb-jobs-agent): long --- orb-jobs-agent/src/args.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/orb-jobs-agent/src/args.rs b/orb-jobs-agent/src/args.rs index 57aeef4d3..14966613c 100644 --- a/orb-jobs-agent/src/args.rs +++ b/orb-jobs-agent/src/args.rs @@ -44,6 +44,8 @@ pub struct Args { )] pub dbus_addr: String, /// Run a single job document locally instead of connecting to relay. + + #[clap(long)] pub run_job: Option, } From 651765a523bef4f9bfa1e2ac27ba3f32438c0104 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Wed, 25 Feb 2026 20:44:00 +0100 Subject: [PATCH 04/66] feat(orb-jobs-agent): make Vslopus happy --- orb-jobs-agent/src/job_system/client.rs | 593 +++++++++++++---------- orb-jobs-agent/src/job_system/handler.rs | 96 +--- orb-jobs-agent/src/main.rs | 156 +++++- orb-jobs-agent/src/program.rs | 29 +- orb-jobs-agent/src/settings.rs | 100 +--- orb-jobs-agent/tests/common/fixture.rs | 81 +++- orb-jobs-agent/tests/job_handler.rs | 46 +- 7 files changed, 592 insertions(+), 509 deletions(-) diff --git a/orb-jobs-agent/src/job_system/client.rs b/orb-jobs-agent/src/job_system/client.rs index 40ee36ca1..53e1740a4 100644 --- a/orb-jobs-agent/src/job_system/client.rs +++ b/orb-jobs-agent/src/job_system/client.rs @@ -6,225 +6,386 @@ use color_eyre::eyre::{eyre, Result}; use orb_relay_client::{Client, QoS, SendMessage}; use orb_relay_messages::{ jobs::v1::{ - JobCancel, JobExecution, JobExecutionUpdate, JobNotify, JobRequestNext, + JobCancel, JobExecution, JobExecutionStatus, JobExecutionUpdate, JobNotify, + JobRequestNext, }, prost::{Message, Name}, prost_types::Any, relay::entity::EntityType, }; -use std::sync::Arc; -use tokio::sync::Mutex; +use std::{future::Future, pin::Pin, sync::Arc}; +use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; -#[derive(Debug, Clone)] -pub(crate) enum JobTransport { - Relay { - relay_client: Client, - target_service_id: String, - relay_namespace: String, - }, - Local(LocalTransport), +pub trait JobTransport: Send + Sync + std::fmt::Debug { + fn listen_for_job<'a>( + &'a self, + job_registry: &'a JobRegistry, + ) -> Pin< + Box< + dyn Future> + + Send + + 'a, + >, + >; + + fn request_next_job<'a>( + &'a self, + job_registry: &'a JobRegistry, + ) -> Pin> + Send + 'a>>; + + fn send_job_update<'a>( + &'a self, + update: &'a JobExecutionUpdate, + ) -> Pin> + Send + 'a>>; + + fn reconnect(&self) -> Pin> + Send + '_>>; } #[derive(Debug, Clone)] -pub(crate) struct LocalTransport { - pending_job: Arc>>, - final_status: Arc>>, - shutdown: CancellationToken, +pub struct RelayTransport { + pub relay_client: Client, + pub target_service_id: String, + pub relay_namespace: String, } -#[derive(Debug, Clone)] -pub struct JobClient { - transport: JobTransport, - job_registry: JobRegistry, - job_config: JobConfig, -} - -impl JobClient { - pub(crate) fn new( - transport: JobTransport, - job_registry: JobRegistry, - job_config: JobConfig, - ) -> Self { - Self { - transport, - job_registry, - job_config, - } - } -} - -impl JobTransport { - pub(crate) fn service( +impl RelayTransport { + pub fn new( relay_client: Client, - target_service_id: &str, - relay_namespace: &str, + target_service_id: impl Into, + relay_namespace: impl Into, ) -> Self { - Self::Relay { + Self { relay_client, - target_service_id: target_service_id.to_string(), - relay_namespace: relay_namespace.to_string(), + target_service_id: target_service_id.into(), + relay_namespace: relay_namespace.into(), } } - - pub(crate) fn local(job: JobExecution, shutdown: CancellationToken) -> Self { - Self::Local(LocalTransport { - pending_job: Arc::new(Mutex::new(Some(job))), - final_status: Arc::new(Mutex::new(None)), - shutdown, - }) - } } -impl JobClient { - pub async fn listen_for_job(&self) -> Result { - if let JobTransport::Local(local) = &self.transport { +impl JobTransport for RelayTransport { + fn listen_for_job<'a>( + &'a self, + job_registry: &'a JobRegistry, + ) -> Pin< + Box< + dyn Future> + + Send + + 'a, + >, + > { + Box::pin(async move { loop { - let next_job = local.pending_job.lock().await.take(); - - if let Some(job) = next_job { - info!( - job_id = %job.job_id, - job_execution_id = %job.job_execution_id, - job_document = %redact_job_document(&job.job_document), - should_cancel = job.should_cancel, - "received local JobExecution" - ); - return Ok(job); - } - - std::future::pending::<()>().await; - } - } - - let relay_client = match &self.transport { - JobTransport::Relay { relay_client, .. } => relay_client, - JobTransport::Local(_) => unreachable!(), - }; - - loop { - match relay_client.recv().await { - Ok(msg) => { - let any = match Any::decode(msg.payload.as_slice()) { - Ok(any) => any, - Err(e) => { - error!("error decoding message: {:?}", e); - continue; - } - }; - if any.type_url == JobNotify::type_url() { - match JobNotify::decode(any.value.as_slice()) { - Ok(job_notify) => { - info!("received JobNotify: {:?}", job_notify); - let _ = self.request_next_job().await; - } + match self.relay_client.recv().await { + Ok(msg) => { + let any = match Any::decode(msg.payload.as_slice()) { + Ok(any) => any, Err(e) => { - error!("error decoding JobNotify: {:?}", e); + error!("error decoding message: {:?}", e); + continue; } - } - } else if any.type_url == JobExecution::type_url() { - match JobExecution::decode(any.value.as_slice()) { - Ok(job) => { - info!( - job_id = %job.job_id, - job_execution_id = %job.job_execution_id, - job_document = %redact_job_document(&job.job_document), - should_cancel = job.should_cancel, - "received JobExecution" - ); - return Ok(job); - } - Err(e) => { - error!("error decoding JobExecution: {:?}", e); + }; + if any.type_url == JobNotify::type_url() { + match JobNotify::decode(any.value.as_slice()) { + Ok(job_notify) => { + info!("received JobNotify: {:?}", job_notify); + let request = build_job_request(job_registry).await; + if let Err(e) = self.send_request(&request).await { + error!("error sending JobRequestNext: {:?}", e); + } + } + Err(e) => { + error!("error decoding JobNotify: {:?}", e); + } } - } - } else if any.type_url == JobCancel::type_url() { - match JobCancel::decode(any.value.as_slice()) { - Ok(job_cancel) => { - info!( - job_execution_id = %job_cancel.job_execution_id, - "received JobCancel" - ); - let cancelled = self - .job_registry - .cancel_job(&job_cancel.job_execution_id) - .await; - if cancelled { + } else if any.type_url == JobExecution::type_url() { + match JobExecution::decode(any.value.as_slice()) { + Ok(job) => { info!( - job_execution_id = %job_cancel.job_execution_id, - "Successfully cancelled job" + job_id = %job.job_id, + job_execution_id = %job.job_execution_id, + job_document = %redact_job_document(&job.job_document), + should_cancel = job.should_cancel, + "received JobExecution" ); - } else { - warn!( + + return Ok(job); + } + Err(e) => { + error!("error decoding JobExecution: {:?}", e); + } + } + } else if any.type_url == JobCancel::type_url() { + match JobCancel::decode(any.value.as_slice()) { + Ok(job_cancel) => { + info!( job_execution_id = %job_cancel.job_execution_id, - "Attempted to cancel non-existent or already completed job" + "received JobCancel" ); + let cancelled = job_registry + .cancel_job(&job_cancel.job_execution_id) + .await; + if cancelled { + info!( + job_execution_id = %job_cancel.job_execution_id, + "Successfully cancelled job" + ); + } else { + warn!( + job_execution_id = %job_cancel.job_execution_id, + "Attempted to cancel non-existent or already completed job" + ); + } + } + Err(e) => { + error!("error decoding JobCancel: {:?}", e); } } - Err(e) => { - error!("error decoding JobCancel: {:?}", e); - } + } else { + error!( + "received unexpected message type: {:?}", + any.type_url + ); } - } else { - error!("received unexpected message type: {:?}", any.type_url); } - } - Err(e) => { - error!("error receiving from relay: {:?}", e); - return Err(e); + Err(e) => { + error!("error receiving from relay: {:?}", e); + + return Err(e); + } } } - } + }) } - /// Requests for a next job to be run, excluding the ones that are - /// currently running (determined by `running_job_execution_ids` arg) - pub async fn request_next_job(&self) -> Result<(), orb_relay_client::Err> { - let (relay_client, target_service_id, relay_namespace) = match &self.transport { - JobTransport::Relay { - relay_client, - target_service_id, - relay_namespace, - } => (relay_client, target_service_id, relay_namespace), - JobTransport::Local(_) => return Ok(()), - }; + fn request_next_job<'a>( + &'a self, + job_registry: &'a JobRegistry, + ) -> Pin> + Send + 'a>> + { + Box::pin(async move { + let request = build_job_request(job_registry).await; + self.send_request(&request).await?; + info!( + "sent JobRequestNext ignoring {} job execution IDs: {:?}", + request.ignore_job_execution_ids.len(), + request.ignore_job_execution_ids + ); - let mut running_ids = self.job_registry.get_active_job_ids().await; - let mut completed_ids = self.job_registry.get_completed_job_ids().await; + Ok(()) + }) + } - running_ids.append(&mut completed_ids); - let job_ids_to_ignore = running_ids; + fn send_job_update<'a>( + &'a self, + job_update: &'a JobExecutionUpdate, + ) -> Pin> + Send + 'a>> + { + Box::pin(async move { + info!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "sending job update: {:?}", + job_update + ); + let any = Any::from_msg(job_update).unwrap(); + self.relay_client + .send( + SendMessage::to(EntityType::Service) + .id(self.target_service_id.clone()) + .namespace(self.relay_namespace.clone()) + .qos(QoS::AtLeastOnce) + .payload(any.encode_to_vec()), + ) + .await + .inspect_err(|e| { + error!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "error sending JobExecutionUpdate: {:?}", + e + ) + })?; + + info!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "sent JobExecutionUpdate" + ); - let job_request = JobRequestNext { - ignore_job_execution_ids: job_ids_to_ignore.clone(), - }; + Ok(()) + }) + } - let any = Any::from_msg(&job_request).unwrap(); - relay_client + fn reconnect(&self) -> Pin> + Send + '_>> { + Box::pin(async move { + self.relay_client + .reconnect() + .await + .map_err(|_| eyre!("failed to force reconnect orb relay")) + }) + } +} + +impl RelayTransport { + async fn send_request( + &self, + request: &JobRequestNext, + ) -> Result<(), orb_relay_client::Err> { + let any = Any::from_msg(request).unwrap(); + self.relay_client .send( SendMessage::to(EntityType::Service) - .id(target_service_id.clone()) - .namespace(relay_namespace.clone()) + .id(self.target_service_id.clone()) + .namespace(self.relay_namespace.clone()) .qos(QoS::AtLeastOnce) .payload(any.encode_to_vec()), ) - .await?; + .await + } +} - info!( - "sent JobRequestNext ignoring {} job execution IDs: {:?}", - job_ids_to_ignore.len(), - job_ids_to_ignore - ); +#[derive(Debug)] +pub struct LocalTransport { + pending_job: std::sync::Mutex>, + final_status: std::sync::Mutex>, + shutdown: CancellationToken, +} + +impl LocalTransport { + pub fn new(job: JobExecution) -> (Self, CancellationToken) { + let shutdown = CancellationToken::new(); + let token = shutdown.clone(); + let transport = Self { + pending_job: std::sync::Mutex::new(Some(job)), + final_status: std::sync::Mutex::new(None), + shutdown, + }; + + (transport, token) + } + + pub fn final_status(&self) -> Option { + *self.final_status.lock().unwrap() + } + + pub fn shutdown_handle(&self) -> JoinHandle> { + let token = self.shutdown.clone(); + tokio::spawn(async move { + token.cancelled().await; + + Ok(()) + }) + } +} + +impl JobTransport for LocalTransport { + fn listen_for_job<'a>( + &'a self, + _job_registry: &'a JobRegistry, + ) -> Pin< + Box< + dyn Future> + + Send + + 'a, + >, + > { + Box::pin(async move { + let next_job = self.pending_job.lock().unwrap().take(); + + if let Some(job) = next_job { + info!( + job_id = %job.job_id, + job_execution_id = %job.job_execution_id, + job_document = %redact_job_document(&job.job_document), + should_cancel = job.should_cancel, + "received local JobExecution" + ); + + return Ok(job); + } + + std::future::pending::<()>().await; + unreachable!() + }) + } + + fn request_next_job<'a>( + &'a self, + _job_registry: &'a JobRegistry, + ) -> Pin> + Send + 'a>> + { + Box::pin(async { Ok(()) }) + } + + fn send_job_update<'a>( + &'a self, + job_update: &'a JobExecutionUpdate, + ) -> Pin> + Send + 'a>> + { + Box::pin(async move { + let status_name = JobExecutionStatus::try_from(job_update.status) + .map(|s| format!("{s:?}")) + .unwrap_or_else(|_| format!("Unknown({})", job_update.status)); + + println!("--- Job Update ---"); + println!("job_id: {}", job_update.job_id); + println!("job_execution_id: {}", job_update.job_execution_id); + println!("status: {status_name}"); + if !job_update.std_out.is_empty() { + println!("stdout:\n{}", job_update.std_out); + } + if !job_update.std_err.is_empty() { + eprintln!("stderr:\n{}", job_update.std_err); + } + + if job_update.status != JobExecutionStatus::InProgress as i32 { + *self.final_status.lock().unwrap() = Some(job_update.status); + self.shutdown.cancel(); + } + + Ok(()) + }) + } + + fn reconnect(&self) -> Pin> + Send + '_>> { + Box::pin(async { Ok(()) }) + } +} + +#[derive(Debug, Clone)] +pub struct JobClient { + transport: Arc, + job_registry: JobRegistry, + job_config: JobConfig, +} - Ok(()) +impl JobClient { + pub fn new( + transport: Arc, + job_registry: JobRegistry, + job_config: JobConfig, + ) -> Self { + Self { + transport, + job_registry, + job_config, + } + } + + pub async fn listen_for_job(&self) -> Result { + self.transport.listen_for_job(&self.job_registry).await + } + + pub async fn request_next_job(&self) -> Result<(), orb_relay_client::Err> { + self.transport.request_next_job(&self.job_registry).await } - /// Check if we should request more jobs and do so if appropriate - /// This method is used to implement parallel job execution + /// Check if we should request more jobs and do so if appropriate. + /// This method is used to implement parallel job execution. /// Returns `false` if no jobs were requested. pub async fn try_request_more_jobs(&self) -> Result { - // Check if we should request more jobs based on current configuration if !self .job_config .should_request_more_jobs(&self.job_registry) @@ -233,7 +394,6 @@ impl JobClient { return Ok(false); } - // Request next job with current running job IDs self.request_next_job() .await .inspect_err(|e| error!("Failed to request additional job: {:?}", e))?; @@ -247,89 +407,21 @@ impl JobClient { &self, job_update: &JobExecutionUpdate, ) -> Result<(), orb_relay_client::Err> { - if let JobTransport::Local(local) = &self.transport { - let escaped_stdout = - serde_json::to_string(&job_update.std_out).unwrap_or_default(); - let escaped_stderr = - serde_json::to_string(&job_update.std_err).unwrap_or_default(); - let escaped_job_id = - serde_json::to_string(&job_update.job_id).unwrap_or_default(); - let escaped_execution_id = - serde_json::to_string(&job_update.job_execution_id).unwrap_or_default(); - let serialized = format!( - "{{\"job_id\":{escaped_job_id},\"job_execution_id\":{escaped_execution_id},\"status\":{},\"std_out\":{escaped_stdout},\"std_err\":{escaped_stderr}}}", - job_update.status - ); - println!("{serialized}"); - - if job_update.status - != orb_relay_messages::jobs::v1::JobExecutionStatus::InProgress as i32 - { - *local.final_status.lock().await = Some(job_update.status); - local.shutdown.cancel(); - } - - return Ok(()); - } - - let (relay_client, target_service_id, relay_namespace) = match &self.transport { - JobTransport::Relay { - relay_client, - target_service_id, - relay_namespace, - } => (relay_client, target_service_id, relay_namespace), - JobTransport::Local(_) => unreachable!(), - }; - - info!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "sending job update: {:?}", - job_update - ); - let any = Any::from_msg(job_update).unwrap(); - relay_client - .send( - SendMessage::to(EntityType::Service) - .id(target_service_id.clone()) - .namespace(relay_namespace.clone()) - .qos(QoS::AtLeastOnce) - .payload(any.encode_to_vec()), - ) - .await - .inspect_err(|e| { - error!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "error sending JobExecutionUpdate: {:?}", - e - ) - })?; - - info!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "sent JobExecutionUpdate" - ); - - Ok(()) + self.transport.send_job_update(job_update).await } pub async fn force_relay_reconnect(&self) -> Result<()> { - match &self.transport { - JobTransport::Relay { relay_client, .. } => relay_client - .reconnect() - .await - .map_err(|_| eyre!("failed to force reconnect orb relay")), - JobTransport::Local(_) => Ok(()), - } + self.transport.reconnect().await } +} - pub async fn local_final_status(&self) -> Option { - match &self.transport { - JobTransport::Local(local) => *local.final_status.lock().await, - JobTransport::Relay { .. } => None, - } +async fn build_job_request(job_registry: &JobRegistry) -> JobRequestNext { + let mut running_ids = job_registry.get_active_job_ids().await; + let mut completed_ids = job_registry.get_completed_job_ids().await; + running_ids.append(&mut completed_ids); + + JobRequestNext { + ignore_job_execution_ids: running_ids, } } @@ -342,7 +434,6 @@ mod tests { #[test] fn test_job_execution_update_creation_for_cancellation() { - // Test that we can create the correct JobExecutionUpdate for cancellation let job_execution = JobExecution { job_id: "test_job_123".to_string(), job_execution_id: "test_execution_456".to_string(), @@ -350,7 +441,6 @@ mod tests { should_cancel: true, }; - // Create the update that main.rs would create for should_cancel = true let cancel_update = JobExecutionUpdate { job_id: job_execution.job_id.clone(), job_execution_id: job_execution.job_execution_id.clone(), @@ -359,7 +449,6 @@ mod tests { std_err: "Job was cancelled".to_string(), }; - // Verify the update has the correct fields assert_eq!(cancel_update.job_id, "test_job_123"); assert_eq!(cancel_update.job_execution_id, "test_execution_456"); assert_eq!(cancel_update.status, JobExecutionStatus::Failed as i32); @@ -369,7 +458,6 @@ mod tests { #[test] fn test_should_cancel_field_detection() { - // Test that we can properly detect should_cancel field let normal_job = JobExecution { job_id: "job1".to_string(), job_execution_id: "exec1".to_string(), @@ -396,7 +484,6 @@ mod tests { #[test] fn test_job_request_with_ignore_ids() { - // Test creating JobRequestNext with ignore IDs directly let ignore_ids = vec![ "job_exec_1".to_string(), "job_exec_2".to_string(), @@ -410,7 +497,6 @@ mod tests { assert_eq!(job_request.ignore_job_execution_ids, ignore_ids); assert_eq!(job_request.ignore_job_execution_ids.len(), 3); - // Test with empty IDs let empty_request = JobRequestNext { ignore_job_execution_ids: vec![], }; @@ -420,7 +506,6 @@ mod tests { #[test] fn test_default_job_request() { - // Test that default JobRequestNext has empty ignore_job_execution_ids let default_request = JobRequestNext::default(); assert!(default_request.ignore_job_execution_ids.is_empty()); } diff --git a/orb-jobs-agent/src/job_system/handler.rs b/orb-jobs-agent/src/job_system/handler.rs index 3fe25e040..fa790e2b7 100644 --- a/orb-jobs-agent/src/job_system/handler.rs +++ b/orb-jobs-agent/src/job_system/handler.rs @@ -6,17 +6,13 @@ use crate::{ orchestrator::{JobCompletion, JobConfig, JobRegistry, JobStartStatus}, sanitize::{redact_args, redact_job_document, should_sanitize}, }, - program::{Deps, JobMode}, - settings::Settings, + program::Deps, }; -use color_eyre::eyre::eyre; use color_eyre::Result; -use orb_relay_client::{Client, ClientOpts}; -use orb_relay_messages::{ - jobs::v1::{JobExecution, JobExecutionStatus, JobExecutionUpdate}, - relay::entity::EntityType, +use orb_relay_messages::jobs::v1::{ + JobExecution, JobExecutionStatus, JobExecutionUpdate, }; -use std::{collections::HashMap, pin::Pin, sync::Arc, time::Duration}; +use std::{collections::HashMap, pin::Pin, sync::Arc}; use tokio::{sync::oneshot, task::JoinHandle}; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; @@ -75,8 +71,13 @@ impl JobHandlerBuilder { self } - pub fn build(self, deps: Deps) -> JobHandler { - JobHandler::new(self, deps) + pub fn build( + self, + deps: Deps, + transport: Arc, + relay_handle: JoinHandle>, + ) -> JobHandler { + JobHandler::new(self, deps, transport, relay_handle) } } @@ -91,7 +92,7 @@ impl JobHandlerBuilder { /// .parallel("read_file", read_file::handler) /// .parallel("mcu", mcu::handler) /// .parallel_max("logs", 3, logs::handler) -/// .build(deps) +/// .build(deps, transport, relay_handle) /// .run() /// .await; /// ``` @@ -119,58 +120,16 @@ impl JobHandler { } } - fn new(builder: JobHandlerBuilder, deps: Deps) -> Self { + fn new( + builder: JobHandlerBuilder, + deps: Deps, + transport: Arc, + relay_handle: JoinHandle>, + ) -> Self { let job_registry = JobRegistry::new(); let job_config = builder.job_config; - let (job_client, relay_handle) = match &deps.job_mode { - JobMode::Service => { - let Settings { - orb_id, - relay_host, - relay_namespace, - target_service_id, - auth, - .. - } = &deps.settings; - - let opts = ClientOpts::entity(EntityType::Orb) - .id(orb_id.as_str().to_string()) - .endpoint(relay_host) - .namespace(relay_namespace) - .auth(auth.clone()) - .connection_timeout(Duration::from_secs(3)) - .connection_backoff(Duration::from_secs(2)) - .keep_alive_interval(Duration::from_secs(30)) - .keep_alive_timeout(Duration::from_secs(10)) - .ack_timeout(Duration::from_secs(5)) - .build(); - - info!("Connecting to relay: {:?}", relay_host); - let (relay_client, relay_handle) = Client::connect(opts); - let transport = JobTransport::service( - relay_client.clone(), - target_service_id.as_str(), - relay_namespace, - ); - let job_client = - JobClient::new(transport, job_registry.clone(), job_config.clone()); - - (job_client, relay_handle) - } - JobMode::LocalSingleJob(job) => { - let shutdown = CancellationToken::new(); - let shutdown_task = shutdown.clone(); - let relay_handle = tokio::spawn(async move { - shutdown_task.cancelled().await; - Ok(()) - }); - let transport = JobTransport::local(job.clone(), shutdown); - let job_client = - JobClient::new(transport, job_registry.clone(), job_config.clone()); - - (job_client, relay_handle) - } - }; + let job_client = + JobClient::new(transport, job_registry.clone(), job_config.clone()); Self { state: Arc::new(deps), @@ -217,21 +176,6 @@ impl JobHandler { } } - if matches!(self.state.job_mode, JobMode::LocalSingleJob(_)) { - let status = - self.job_client.local_final_status().await.ok_or_else(|| { - eyre!("local run ended without terminal job status") - })?; - - if status != JobExecutionStatus::Succeeded as i32 { - let status_name = JobExecutionStatus::try_from(status) - .map(|s| format!("{s:?}")) - .unwrap_or_else(|_| format!("Unknown({status})")); - - return Err(eyre!("local job failed with status {status_name}")); - } - } - Ok(()) } diff --git a/orb-jobs-agent/src/main.rs b/orb-jobs-agent/src/main.rs index 91ea47fb8..8e002a4a0 100644 --- a/orb-jobs-agent/src/main.rs +++ b/orb-jobs-agent/src/main.rs @@ -1,11 +1,19 @@ use clap::Parser; -use color_eyre::eyre::Result; +use color_eyre::eyre::{eyre, Context, ContextCompat, Result}; +use orb_endpoints::{v1::Endpoints, Backend}; +use orb_info::TokenTaskHandle; use orb_jobs_agent::args::Args; -use orb_jobs_agent::program::{self, Deps, JobMode}; +use orb_jobs_agent::job_system::client::{LocalTransport, RelayTransport}; +use orb_jobs_agent::program::{self, Deps}; use orb_jobs_agent::settings::Settings; use orb_jobs_agent::shell::Host; -use orb_relay_messages::jobs::v1::JobExecution; -use tracing::info; +use orb_relay_client::{Auth, Client, ClientOpts}; +use orb_relay_messages::jobs::v1::{JobExecution, JobExecutionStatus}; +use orb_relay_messages::relay::entity::EntityType; +use std::sync::Arc; +use std::time::Duration; +use tokio_util::sync::CancellationToken; +use tracing::{info, warn}; const SYSLOG_IDENTIFIER: &str = "worldcoin-jobs-agent"; @@ -28,25 +36,135 @@ async fn run(args: &Args) -> Result<()> { let connection = zbus::ConnectionBuilder::address(args.dbus_addr.as_str())? .build() .await?; - let job_mode = match &args.run_job { - Some(job_document) => JobMode::LocalSingleJob(JobExecution { - job_id: "local-job".to_string(), - job_execution_id: "local-job-execution".to_string(), - job_document: job_document.clone(), - should_cancel: false, - }), - None => JobMode::Service, + + let settings = Settings::from_args(args, "/mnt/scratch").await?; + + let deps = Deps::new(Host, connection, settings.clone()); + + match &args.run_job { + Some(job_document) => run_local(deps, job_document).await, + None => run_service(deps, args, &settings).await, + } +} + +async fn run_local(deps: Deps, job_document: &str) -> Result<()> { + let job = JobExecution { + job_id: "local-job".to_string(), + job_execution_id: "local-job-execution".to_string(), + job_document: job_document.to_string(), + should_cancel: false, + }; + + let (local_transport, _shutdown_token) = LocalTransport::new(job); + let transport = Arc::new(local_transport); + let relay_handle = { + let t = Arc::clone(&transport); + t.shutdown_handle() }; - let deps = Deps::new( - Host, - connection, - Settings::from_args(args, "/mnt/scratch").await?, - job_mode, - ); + program::run(deps, Arc::clone(&transport) as _, relay_handle).await?; - program::run(deps).await?; + let status = transport + .final_status() + .ok_or_else(|| eyre!("local run ended without terminal job status"))?; + + if status != JobExecutionStatus::Succeeded as i32 { + let status_name = JobExecutionStatus::try_from(status) + .map(|s| format!("{s:?}")) + .unwrap_or_else(|_| format!("Unknown({status})")); + + return Err(eyre!("local job failed with status {status_name}")); + } info!("Shutting down jobs agent completed"); + + Ok(()) +} + +async fn run_service(deps: Deps, args: &Args, settings: &Settings) -> Result<()> { + let relay_host = args + .relay_host + .clone() + .or_else(|| { + Backend::from_env().ok().map(|backend| { + Endpoints::new(backend, &settings.orb_id).relay.to_string() + }) + }) + .wrap_err("could not get Backend Endpoint from env")?; + + let auth = match &args.orb_token { + Some(t) => Auth::Token(t.as_str().into()), + None => { + let shutdown_token = CancellationToken::new(); + let dbus_addr = args.dbus_addr.clone(); + let get_token = async || { + let connection = zbus::ConnectionBuilder::address(dbus_addr.as_str())? + .build() + .await + .map_err(|e| { + eyre!("failed to establish zbus conn at {}: {e}", dbus_addr) + })?; + + TokenTaskHandle::spawn(&connection, &shutdown_token) + .await + .wrap_err("failed to get auth token!") + }; + + let token_rec_fut = async { + loop { + match get_token().await { + Err(e) => { + warn!("{e}! trying again in 5s"); + tokio::time::sleep(Duration::from_secs(5)).await; + continue; + } + Ok(t) => break t.token_recv, + } + } + }; + + let token_rec = + tokio::time::timeout(Duration::from_secs(60), token_rec_fut) + .await + .wrap_err("could not get auth token after 60s")?; + + Auth::TokenReceiver(token_rec) + } + }; + + let relay_namespace = args + .relay_namespace + .clone() + .wrap_err("relay namespace MUST be provided")?; + + let target_service_id = args + .target_service_id + .clone() + .wrap_err("target service id MUST be provided")?; + + let opts = ClientOpts::entity(EntityType::Orb) + .id(settings.orb_id.as_str().to_string()) + .endpoint(&relay_host) + .namespace(&relay_namespace) + .auth(auth) + .connection_timeout(Duration::from_secs(3)) + .connection_backoff(Duration::from_secs(2)) + .keep_alive_interval(Duration::from_secs(30)) + .keep_alive_timeout(Duration::from_secs(10)) + .ack_timeout(Duration::from_secs(5)) + .build(); + + info!("Connecting to relay: {:?}", relay_host); + let (relay_client, relay_handle) = Client::connect(opts); + let transport = Arc::new(RelayTransport::new( + relay_client, + target_service_id, + relay_namespace, + )); + + program::run(deps, transport, relay_handle).await?; + + info!("Shutting down jobs agent completed"); + Ok(()) } diff --git a/orb-jobs-agent/src/program.rs b/orb-jobs-agent/src/program.rs index 736509bc7..996d40c62 100644 --- a/orb-jobs-agent/src/program.rs +++ b/orb-jobs-agent/src/program.rs @@ -7,35 +7,23 @@ use crate::{ update_versions, wifi_add, wifi_connect, wifi_ip, wifi_list, wifi_remove, wifi_scan, wipe_downloads, }, - job_system::handler::JobHandler, + job_system::{client::JobTransport, handler::JobHandler}, settings::Settings, shell::Shell, }; use color_eyre::Result; -use orb_relay_messages::jobs::v1::JobExecution; -use tokio::fs; - -#[derive(Debug, Clone)] -pub enum JobMode { - Service, - LocalSingleJob(JobExecution), -} +use std::sync::Arc; +use tokio::{fs, task::JoinHandle}; /// Dependencies used by the jobs-agent. pub struct Deps { pub shell: Box, pub session_dbus: zbus::Connection, pub settings: Settings, - pub job_mode: JobMode, } impl Deps { - pub fn new( - shell: S, - session_dbus: zbus::Connection, - settings: Settings, - job_mode: JobMode, - ) -> Self + pub fn new(shell: S, session_dbus: zbus::Connection, settings: Settings) -> Self where S: Shell + 'static, { @@ -43,12 +31,15 @@ impl Deps { shell: Box::new(shell), session_dbus, settings, - job_mode, } } } -pub async fn run(deps: Deps) -> Result<()> { +pub async fn run( + deps: Deps, + transport: Arc, + relay_handle: JoinHandle>, +) -> Result<()> { fs::create_dir_all(&deps.settings.store_path).await?; let orb_id = deps.settings.orb_id.clone(); let zenoh_port = deps.settings.zenoh_port; @@ -84,7 +75,7 @@ pub async fn run(deps: Deps) -> Result<()> { .parallel_max("logs", 3, logs::handler) .sequential("reboot", reboot::handler) .sequential("slot_switch", slot_switch::handler) - .build(deps); + .build(deps, transport, relay_handle); let _zenoh_session = conn_change::spawn_watcher(orb_id, job_handler.job_client.clone(), zenoh_port) diff --git a/orb-jobs-agent/src/settings.rs b/orb-jobs-agent/src/settings.rs index 51fdd13dd..05c25b903 100644 --- a/orb-jobs-agent/src/settings.rs +++ b/orb-jobs-agent/src/settings.rs @@ -1,31 +1,18 @@ use crate::args::Args; -use color_eyre::{ - eyre::{eyre, Context, ContextCompat}, - Result, -}; -use orb_endpoints::{v1::Endpoints, Backend}; +use color_eyre::{eyre::Context, Result}; use orb_info::{ orb_os_release::{OrbOsPlatform, OrbOsRelease}, - OrbId, TokenTaskHandle, + OrbId, }; -use orb_relay_client::Auth; use std::{ path::{Path, PathBuf}, str::FromStr, - time::Duration, }; -use tokio::time; -use tokio_util::sync::CancellationToken; -use tracing::warn; #[derive(Debug, Clone)] pub struct Settings { pub orb_id: OrbId, pub orb_platform: OrbOsPlatform, - pub auth: Auth, - pub relay_host: String, - pub relay_namespace: String, - pub target_service_id: String, /// Filesystem path used to persist data pub store_path: PathBuf, /// Path to the calibration file (configurable for testing) @@ -43,8 +30,6 @@ pub struct Settings { impl Settings { pub async fn from_args(args: &Args, store_path: impl AsRef) -> Result { - let is_local_run = args.run_job.is_some(); - let orb_id = if let Some(id) = &args.orb_id { OrbId::from_str(id)? } else { @@ -64,83 +49,6 @@ impl Settings { os_release.orb_os_platform_type }; - let relay_host = if is_local_run { - args.relay_host - .clone() - .unwrap_or_else(|| "http://127.0.0.1:1".to_string()) - } else { - args.relay_host - .clone() - .or_else(|| { - Backend::from_env().ok().map(|backend| { - Endpoints::new(backend, &orb_id).relay.to_string() - }) - }) - .wrap_err("could not get Backend Endpoint from env")? - }; - - // Get token from DBus - let auth = if is_local_run { - args.orb_token - .as_ref() - .map(|token| Auth::Token(token.as_str().into())) - .unwrap_or_else(|| Auth::Token(Default::default())) - } else { - match &args.orb_token { - Some(t) => Auth::Token(t.as_str().into()), - None => { - let shutdown_token = CancellationToken::new(); - let get_token = async || { - let connection = - zbus::ConnectionBuilder::address(args.dbus_addr.as_str())? - .build() - .await - .map_err(|e| { - eyre!( - "failed to establish zbus conn at {}: {e}", - args.dbus_addr - ) - })?; - - TokenTaskHandle::spawn(&connection, &shutdown_token) - .await - .wrap_err("failed to get auth token!") - }; - - let token_rec_fut = async { - loop { - match get_token().await { - Err(e) => { - warn!("{e}! trying again in 5s"); - time::sleep(Duration::from_secs(5)).await; - continue; - } - - Ok(t) => break t.token_recv, - } - } - }; - - let token_rec = - time::timeout(Duration::from_secs(60), token_rec_fut) - .await - .wrap_err("could not get auth token after 60s")?; - - Auth::TokenReceiver(token_rec) - } - } - }; - - let relay_namespace = args - .relay_namespace - .clone() - .wrap_err("relay namespace MUST be provided")?; - - let target_service_id = args - .target_service_id - .clone() - .wrap_err("target service id MUST be provided")?; - let downloads_path = match orb_platform { OrbOsPlatform::Diamond => PathBuf::from("/mnt/scratch"), OrbOsPlatform::Pearl => PathBuf::from("/mnt/updates"), @@ -149,10 +57,6 @@ impl Settings { Ok(Self { orb_id, orb_platform, - auth, - relay_host, - relay_namespace, - target_service_id, store_path: store_path.as_ref().to_path_buf(), calibration_file_path: PathBuf::from("/usr/persistent/calibration.json"), os_release_path: PathBuf::from("/etc/os-release"), diff --git a/orb-jobs-agent/tests/common/fixture.rs b/orb-jobs-agent/tests/common/fixture.rs index 3bfb0f8a7..db954d687 100644 --- a/orb-jobs-agent/tests/common/fixture.rs +++ b/orb-jobs-agent/tests/common/fixture.rs @@ -8,7 +8,8 @@ use dbus_launch::BusType; use orb_connd_dbus::Connd; use orb_info::OrbId; use orb_jobs_agent::{ - program::{self, Deps, JobMode}, + job_system::client::RelayTransport, + program::{self, Deps}, settings::Settings, shell::Shell, }; @@ -27,6 +28,7 @@ use orb_relay_messages::{ }; use orb_relay_test_utils::{IntoRes, TestServer}; use orb_telemetry::TelemetryFlusher; +use std::sync::Arc; use std::time::Duration; use test_utils::async_bag::AsyncBag; use tokio::task::{self, JoinHandle}; @@ -42,6 +44,10 @@ pub struct JobAgentFixture { _server: TestServer<()>, client: Client, pub settings: Settings, + pub relay_host: String, + pub relay_namespace: String, + pub target_service_id: String, + pub auth: Auth, pub execution_updates: AsyncBag>, pub job_queue: JobQueue, _tempdir: TempDir, @@ -51,6 +57,56 @@ pub struct JobAgentFixture { zenoh_port: u16, } +impl JobAgentFixture { + pub fn relay_transport(&self) -> RelayTransport { + let opts = ClientOpts::entity(EntityType::Orb) + .id(self.settings.orb_id.to_string()) + .namespace(self.relay_namespace.clone()) + .endpoint(self.relay_host.clone()) + .auth(self.auth.clone()) + .max_connection_attempts(Amount::Val(3)) + .connection_timeout(Duration::from_secs(1)) + .heartbeat(Duration::from_secs(u64::MAX)) + .ack_timeout(Duration::from_secs(1)) + .build(); + + let (relay_client, _handle) = Client::connect(opts); + + RelayTransport::new( + relay_client, + self.target_service_id.clone(), + self.relay_namespace.clone(), + ) + } + + pub fn connect_relay( + &self, + ) -> ( + Arc, + JoinHandle>, + ) { + let opts = ClientOpts::entity(EntityType::Orb) + .id(self.settings.orb_id.to_string()) + .namespace(self.relay_namespace.clone()) + .endpoint(self.relay_host.clone()) + .auth(self.auth.clone()) + .max_connection_attempts(Amount::Val(3)) + .connection_timeout(Duration::from_secs(1)) + .heartbeat(Duration::from_secs(u64::MAX)) + .ack_timeout(Duration::from_secs(1)) + .build(); + + let (relay_client, relay_handle) = Client::connect(opts); + let transport = Arc::new(RelayTransport::new( + relay_client, + self.target_service_id.clone(), + self.relay_namespace.clone(), + )); + + (transport, relay_handle) + } +} + #[bon] impl JobAgentFixture { pub fn init_tracing(&self) -> TelemetryFlusher { @@ -187,10 +243,6 @@ impl JobAgentFixture { let settings = Settings { orb_id: OrbId::Short(orb_id.parse().unwrap()), orb_platform: orb_info::orb_os_release::OrbOsPlatform::Diamond, - auth, - relay_host, - relay_namespace: namespace, - target_service_id: target_service_id.to_string(), store_path: tempdir.to_path_buf(), // Use non-existent paths by default for tests (can be overridden) calibration_file_path: "/nonexistent/calibration.json".into(), @@ -220,6 +272,10 @@ impl JobAgentFixture { _server: server, client, settings, + relay_host, + relay_namespace: namespace, + target_service_id, + auth, execution_updates, job_queue, _tempdir: tempdir, @@ -251,16 +307,13 @@ impl JobAgentFixture { .await .unwrap(); - let deps = Deps::new( - shell, - self.dbus_conn.clone(), - settings.clone(), - JobMode::Service, - ); + let (transport, relay_handle) = self.connect_relay(); + + let deps = Deps::new(shell, self.dbus_conn.clone(), settings.clone()); let join_handle = task::spawn(async move { tokio::select! { - r = program::run(deps) => { + r = program::run(deps, transport, relay_handle) => { if let Err(e) = r { println!("program::run failed with {e}"); } @@ -307,7 +360,7 @@ impl JobAgentFixture { .send( SendMessage::to(EntityType::Orb) .id(self.settings.orb_id.to_string()) - .namespace(&self.settings.relay_namespace) + .namespace(&self.relay_namespace) .qos(QoS::AtLeastOnce) .payload(payload), ) @@ -328,7 +381,7 @@ impl JobAgentFixture { .send( SendMessage::to(EntityType::Orb) .id(self.settings.orb_id.to_string()) - .namespace(&self.settings.relay_namespace) + .namespace(&self.relay_namespace) .qos(QoS::AtLeastOnce) .payload(payload), ) diff --git a/orb-jobs-agent/tests/job_handler.rs b/orb-jobs-agent/tests/job_handler.rs index c53aa7fef..c064877ab 100644 --- a/orb-jobs-agent/tests/job_handler.rs +++ b/orb-jobs-agent/tests/job_handler.rs @@ -2,7 +2,7 @@ use color_eyre::eyre::bail; use common::fixture::JobAgentFixture; use orb_jobs_agent::{ job_system::{ctx::JobExecutionUpdateExt, handler::JobHandler}, - program::{Deps, JobMode}, + program::Deps, shell::Host, }; use orb_relay_messages::jobs::v1::JobExecutionStatus; @@ -18,12 +18,8 @@ mod common; async fn sequential_jobs_block_other_jobs_execution() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new( - Host, - fx.dbus_conn.clone(), - fx.settings.clone(), - JobMode::Service, - ); + let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let (transport, relay_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(100); @@ -34,7 +30,7 @@ async fn sequential_jobs_block_other_jobs_execution() { Ok(ctx.success().stdout("one")) }) .parallel("second", async |ctx| Ok(ctx.success().stdout("two"))) - .build(deps) + .build(deps, transport, relay_handle) .run(), ); @@ -51,12 +47,8 @@ async fn sequential_jobs_block_other_jobs_execution() { async fn can_start_parallel_jobs_in_parallel() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new( - Host, - fx.dbus_conn.clone(), - fx.settings.clone(), - JobMode::Service, - ); + let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let (transport, relay_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(500); @@ -67,7 +59,7 @@ async fn can_start_parallel_jobs_in_parallel() { Ok(ctx.success().stdout("one")) }) .parallel("second", async |ctx| Ok(ctx.success().stdout("two"))) - .build(deps) + .build(deps, transport, relay_handle) .run(), ); @@ -90,14 +82,14 @@ async fn parallel_jobs_dont_exceed_max() { async fn gracefully_handles_unsupported_cmds() { // Arrange let fx = JobAgentFixture::new().await; - let deps = Deps::new( - Host, - fx.dbus_conn.clone(), - fx.settings.clone(), - JobMode::Service, - ); + let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let (transport, relay_handle) = fx.connect_relay(); - task::spawn(JobHandler::builder().build(deps).run()); + task::spawn( + JobHandler::builder() + .build(deps, transport, relay_handle) + .run(), + ); // Act fx.enqueue_job("joberoni").await.wait_for_completion().await; @@ -111,12 +103,8 @@ async fn gracefully_handles_unsupported_cmds() { async fn it_cancels_a_long_running_job() { // Arrange let fx = JobAgentFixture::with_namespace("cancel_long_running_job").await; - let deps = Deps::new( - Host, - fx.dbus_conn.clone(), - fx.settings.clone(), - JobMode::Service, - ); + let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); + let (transport, relay_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(50); @@ -140,7 +128,7 @@ async fn it_cancels_a_long_running_job() { Ok(ctx.success().stdout("cancelled succesfully!")) }) - .build(deps) + .build(deps, transport, relay_handle) .run(), ); From f2d3069e7f2143d39a02db0189f3e91379f1adf1 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Thu, 26 Feb 2026 13:41:23 +0100 Subject: [PATCH 05/66] feat(orb-jobs-agent): async_trait --- orb-jobs-agent/src/job_system/client.rs | 429 +++++++++++------------- 1 file changed, 197 insertions(+), 232 deletions(-) diff --git a/orb-jobs-agent/src/job_system/client.rs b/orb-jobs-agent/src/job_system/client.rs index 53e1740a4..0cf363268 100644 --- a/orb-jobs-agent/src/job_system/client.rs +++ b/orb-jobs-agent/src/job_system/client.rs @@ -2,6 +2,7 @@ use crate::job_system::{ orchestrator::{JobConfig, JobRegistry}, sanitize::redact_job_document, }; +use async_trait::async_trait; use color_eyre::eyre::{eyre, Result}; use orb_relay_client::{Client, QoS, SendMessage}; use orb_relay_messages::{ @@ -13,34 +14,29 @@ use orb_relay_messages::{ prost_types::Any, relay::entity::EntityType, }; -use std::{future::Future, pin::Pin, sync::Arc}; +use std::sync::Arc; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; +#[async_trait] pub trait JobTransport: Send + Sync + std::fmt::Debug { - fn listen_for_job<'a>( - &'a self, - job_registry: &'a JobRegistry, - ) -> Pin< - Box< - dyn Future> - + Send - + 'a, - >, - >; - - fn request_next_job<'a>( - &'a self, - job_registry: &'a JobRegistry, - ) -> Pin> + Send + 'a>>; - - fn send_job_update<'a>( - &'a self, - update: &'a JobExecutionUpdate, - ) -> Pin> + Send + 'a>>; - - fn reconnect(&self) -> Pin> + Send + '_>>; + async fn listen_for_job( + &self, + job_registry: &JobRegistry, + ) -> Result; + + async fn request_next_job( + &self, + job_registry: &JobRegistry, + ) -> Result<(), orb_relay_client::Err>; + + async fn send_job_update( + &self, + update: &JobExecutionUpdate, + ) -> Result<(), orb_relay_client::Err>; + + async fn reconnect(&self) -> Result<()>; } #[derive(Debug, Clone)] @@ -62,178 +58,135 @@ impl RelayTransport { relay_namespace: relay_namespace.into(), } } + + async fn send_request( + &self, + request: &JobRequestNext, + ) -> Result<(), orb_relay_client::Err> { + let any = Any::from_msg(request).unwrap(); + self.relay_client + .send( + SendMessage::to(EntityType::Service) + .id(self.target_service_id.clone()) + .namespace(self.relay_namespace.clone()) + .qos(QoS::AtLeastOnce) + .payload(any.encode_to_vec()), + ) + .await + } } +#[async_trait] impl JobTransport for RelayTransport { - fn listen_for_job<'a>( - &'a self, - job_registry: &'a JobRegistry, - ) -> Pin< - Box< - dyn Future> - + Send - + 'a, - >, - > { - Box::pin(async move { - loop { - match self.relay_client.recv().await { - Ok(msg) => { - let any = match Any::decode(msg.payload.as_slice()) { - Ok(any) => any, + async fn listen_for_job( + &self, + job_registry: &JobRegistry, + ) -> Result { + loop { + match self.relay_client.recv().await { + Ok(msg) => { + let any = match Any::decode(msg.payload.as_slice()) { + Ok(any) => any, + Err(e) => { + error!("error decoding message: {:?}", e); + continue; + } + }; + if any.type_url == JobNotify::type_url() { + match JobNotify::decode(any.value.as_slice()) { + Ok(job_notify) => { + info!("received JobNotify: {:?}", job_notify); + let request = build_job_request(job_registry).await; + if let Err(e) = self.send_request(&request).await { + error!("error sending JobRequestNext: {:?}", e); + } + } Err(e) => { - error!("error decoding message: {:?}", e); - continue; + error!("error decoding JobNotify: {:?}", e); } - }; - if any.type_url == JobNotify::type_url() { - match JobNotify::decode(any.value.as_slice()) { - Ok(job_notify) => { - info!("received JobNotify: {:?}", job_notify); - let request = build_job_request(job_registry).await; - if let Err(e) = self.send_request(&request).await { - error!("error sending JobRequestNext: {:?}", e); - } - } - Err(e) => { - error!("error decoding JobNotify: {:?}", e); - } + } + } else if any.type_url == JobExecution::type_url() { + match JobExecution::decode(any.value.as_slice()) { + Ok(job) => { + info!( + job_id = %job.job_id, + job_execution_id = %job.job_execution_id, + job_document = %redact_job_document(&job.job_document), + should_cancel = job.should_cancel, + "received JobExecution" + ); + + return Ok(job); } - } else if any.type_url == JobExecution::type_url() { - match JobExecution::decode(any.value.as_slice()) { - Ok(job) => { - info!( - job_id = %job.job_id, - job_execution_id = %job.job_execution_id, - job_document = %redact_job_document(&job.job_document), - should_cancel = job.should_cancel, - "received JobExecution" - ); - - return Ok(job); - } - Err(e) => { - error!("error decoding JobExecution: {:?}", e); - } + Err(e) => { + error!("error decoding JobExecution: {:?}", e); } - } else if any.type_url == JobCancel::type_url() { - match JobCancel::decode(any.value.as_slice()) { - Ok(job_cancel) => { + } + } else if any.type_url == JobCancel::type_url() { + match JobCancel::decode(any.value.as_slice()) { + Ok(job_cancel) => { + info!( + job_execution_id = %job_cancel.job_execution_id, + "received JobCancel" + ); + let cancelled = job_registry + .cancel_job(&job_cancel.job_execution_id) + .await; + if cancelled { info!( job_execution_id = %job_cancel.job_execution_id, - "received JobCancel" + "Successfully cancelled job" + ); + } else { + warn!( + job_execution_id = %job_cancel.job_execution_id, + "Attempted to cancel non-existent or already completed job" ); - let cancelled = job_registry - .cancel_job(&job_cancel.job_execution_id) - .await; - if cancelled { - info!( - job_execution_id = %job_cancel.job_execution_id, - "Successfully cancelled job" - ); - } else { - warn!( - job_execution_id = %job_cancel.job_execution_id, - "Attempted to cancel non-existent or already completed job" - ); - } - } - Err(e) => { - error!("error decoding JobCancel: {:?}", e); } } - } else { - error!( - "received unexpected message type: {:?}", - any.type_url - ); + Err(e) => { + error!("error decoding JobCancel: {:?}", e); + } } + } else { + error!("received unexpected message type: {:?}", any.type_url); } - Err(e) => { - error!("error receiving from relay: {:?}", e); + } + Err(e) => { + error!("error receiving from relay: {:?}", e); - return Err(e); - } + return Err(e); } } - }) - } - - fn request_next_job<'a>( - &'a self, - job_registry: &'a JobRegistry, - ) -> Pin> + Send + 'a>> - { - Box::pin(async move { - let request = build_job_request(job_registry).await; - self.send_request(&request).await?; - info!( - "sent JobRequestNext ignoring {} job execution IDs: {:?}", - request.ignore_job_execution_ids.len(), - request.ignore_job_execution_ids - ); - - Ok(()) - }) + } } - fn send_job_update<'a>( - &'a self, - job_update: &'a JobExecutionUpdate, - ) -> Pin> + Send + 'a>> - { - Box::pin(async move { - info!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "sending job update: {:?}", - job_update - ); - let any = Any::from_msg(job_update).unwrap(); - self.relay_client - .send( - SendMessage::to(EntityType::Service) - .id(self.target_service_id.clone()) - .namespace(self.relay_namespace.clone()) - .qos(QoS::AtLeastOnce) - .payload(any.encode_to_vec()), - ) - .await - .inspect_err(|e| { - error!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "error sending JobExecutionUpdate: {:?}", - e - ) - })?; - - info!( - job_execution_id = %job_update.job_execution_id, - job_id = %job_update.job_id, - "sent JobExecutionUpdate" - ); - - Ok(()) - }) - } + async fn request_next_job( + &self, + job_registry: &JobRegistry, + ) -> Result<(), orb_relay_client::Err> { + let request = build_job_request(job_registry).await; + self.send_request(&request).await?; + info!( + "sent JobRequestNext ignoring {} job execution IDs: {:?}", + request.ignore_job_execution_ids.len(), + request.ignore_job_execution_ids + ); - fn reconnect(&self) -> Pin> + Send + '_>> { - Box::pin(async move { - self.relay_client - .reconnect() - .await - .map_err(|_| eyre!("failed to force reconnect orb relay")) - }) + Ok(()) } -} -impl RelayTransport { - async fn send_request( + async fn send_job_update( &self, - request: &JobRequestNext, + job_update: &JobExecutionUpdate, ) -> Result<(), orb_relay_client::Err> { - let any = Any::from_msg(request).unwrap(); + info!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "sending job update: {:?}", + job_update + ); + let any = Any::from_msg(job_update).unwrap(); self.relay_client .send( SendMessage::to(EntityType::Service) @@ -243,6 +196,29 @@ impl RelayTransport { .payload(any.encode_to_vec()), ) .await + .inspect_err(|e| { + error!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "error sending JobExecutionUpdate: {:?}", + e + ) + })?; + + info!( + job_execution_id = %job_update.job_execution_id, + job_id = %job_update.job_id, + "sent JobExecutionUpdate" + ); + + Ok(()) + } + + async fn reconnect(&self) -> Result<()> { + self.relay_client + .reconnect() + .await + .map_err(|_| eyre!("failed to force reconnect orb relay")) } } @@ -280,77 +256,66 @@ impl LocalTransport { } } +#[async_trait] impl JobTransport for LocalTransport { - fn listen_for_job<'a>( - &'a self, - _job_registry: &'a JobRegistry, - ) -> Pin< - Box< - dyn Future> - + Send - + 'a, - >, - > { - Box::pin(async move { - let next_job = self.pending_job.lock().unwrap().take(); - - if let Some(job) = next_job { - info!( - job_id = %job.job_id, - job_execution_id = %job.job_execution_id, - job_document = %redact_job_document(&job.job_document), - should_cancel = job.should_cancel, - "received local JobExecution" - ); - - return Ok(job); - } + async fn listen_for_job( + &self, + _job_registry: &JobRegistry, + ) -> Result { + let next_job = self.pending_job.lock().unwrap().take(); - std::future::pending::<()>().await; - unreachable!() - }) + if let Some(job) = next_job { + info!( + job_id = %job.job_id, + job_execution_id = %job.job_execution_id, + job_document = %redact_job_document(&job.job_document), + should_cancel = job.should_cancel, + "received local JobExecution" + ); + + return Ok(job); + } + + std::future::pending::<()>().await; + unreachable!() } - fn request_next_job<'a>( - &'a self, - _job_registry: &'a JobRegistry, - ) -> Pin> + Send + 'a>> - { - Box::pin(async { Ok(()) }) + async fn request_next_job( + &self, + _job_registry: &JobRegistry, + ) -> Result<(), orb_relay_client::Err> { + Ok(()) } - fn send_job_update<'a>( - &'a self, - job_update: &'a JobExecutionUpdate, - ) -> Pin> + Send + 'a>> - { - Box::pin(async move { - let status_name = JobExecutionStatus::try_from(job_update.status) - .map(|s| format!("{s:?}")) - .unwrap_or_else(|_| format!("Unknown({})", job_update.status)); - - println!("--- Job Update ---"); - println!("job_id: {}", job_update.job_id); - println!("job_execution_id: {}", job_update.job_execution_id); - println!("status: {status_name}"); - if !job_update.std_out.is_empty() { - println!("stdout:\n{}", job_update.std_out); - } - if !job_update.std_err.is_empty() { - eprintln!("stderr:\n{}", job_update.std_err); - } + async fn send_job_update( + &self, + job_update: &JobExecutionUpdate, + ) -> Result<(), orb_relay_client::Err> { + let status_name = JobExecutionStatus::try_from(job_update.status) + .map(|s| format!("{s:?}")) + .unwrap_or_else(|_| format!("Unknown({})", job_update.status)); + + println!("--- Job Update ---"); + println!("job_id: {}", job_update.job_id); + println!("job_execution_id: {}", job_update.job_execution_id); + println!("status: {status_name}"); + if !job_update.std_out.is_empty() { + println!("stdout:\n{}", job_update.std_out); + } + if !job_update.std_err.is_empty() { + eprintln!("stderr:\n{}", job_update.std_err); + } - if job_update.status != JobExecutionStatus::InProgress as i32 { - *self.final_status.lock().unwrap() = Some(job_update.status); - self.shutdown.cancel(); - } + if job_update.status != JobExecutionStatus::InProgress as i32 { + *self.final_status.lock().unwrap() = Some(job_update.status); + self.shutdown.cancel(); + } - Ok(()) - }) + Ok(()) } - fn reconnect(&self) -> Pin> + Send + '_>> { - Box::pin(async { Ok(()) }) + async fn reconnect(&self) -> Result<()> { + Ok(()) } } From efcd115271d48ea78fbcf86f77d8e299112d2d40 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 11 Mar 2026 17:05:33 +0100 Subject: [PATCH 06/66] chore(hil): update awscli to 2.32.12 on hils (#1067) The option --progress-frequency is required --- nix/packages/hil.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/packages/hil.nix b/nix/packages/hil.nix index c9b811d42..94804da92 100644 --- a/nix/packages/hil.nix +++ b/nix/packages/hil.nix @@ -3,7 +3,7 @@ with pkgs; [ # HIL Specific - awscli2 + unstable.awscli2 cloudflared git gnutar From 472abe8309241d7ee91bf9a311e0ef0139b671dd Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 11 Mar 2026 18:35:13 +0100 Subject: [PATCH 07/66] chore: update awscli system-wide (#1068) 1 attempt didn't work out --- nix/machines/nixos-common.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/machines/nixos-common.nix b/nix/machines/nixos-common.nix index 8405deed9..d3a9b3217 100644 --- a/nix/machines/nixos-common.nix +++ b/nix/machines/nixos-common.nix @@ -79,7 +79,7 @@ in programs.nix-ld.enable = true; environment.systemPackages = with pkgs; [ - awscli2 # todo: remove this when hil can be consumed via flake + unstable.awscli2 # todo: remove this when hil can be consumed via flake bun gh git From 15ad58c765602ddbcf1c45404a6adfd96a9d11bc Mon Sep 17 00:00:00 2001 From: vmenge Date: Wed, 11 Mar 2026 18:51:16 +0100 Subject: [PATCH 08/66] chore(connd): use StoredWifiProfile for serde of Wifi profiles (#1059) ## context i previously implemented serde of Wifi profiles using the same types as the ones used for the `NetworkManager` abstraction. this smelled bad and after weeks of feeling the stink of my own code i could take it no longer ## changes - uses `StoredWifiProfile` and `StoredWifiSec` for serialization so we don't accidentally break deserialization due to changes in `NetworkManager` abstraction - adds a backcompat to make sure ciborium can deser `StoredWifiProfile` from serialized `WifiProfile`s ## todo - [x] test on an orb --- orb-connd/src/service/mod.rs | 147 ++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 3 deletions(-) diff --git a/orb-connd/src/service/mod.rs b/orb-connd/src/service/mod.rs index 7ba01a86d..203e55138 100644 --- a/orb-connd/src/service/mod.rs +++ b/orb-connd/src/service/mod.rs @@ -9,6 +9,7 @@ use color_eyre::{ }; use orb_connd_dbus::{Connd, OBJ_PATH, SERVICE}; use orb_info::orb_os_release::OrbRelease; +use serde::{Deserialize, Serialize}; use std::cmp; use std::collections::HashSet; use std::path::Path; @@ -263,7 +264,7 @@ impl ConndService { }; info!("importing {} bytes from secure storage", ss_profiles.len()); - let ss_profiles: Vec = ciborium::de::from_reader( + let ss_profiles: Vec = ciborium::de::from_reader( ss_profiles.as_slice(), ) .wrap_err("failed to deserialize secure storage bytes into wifi profiles")?; @@ -278,7 +279,7 @@ impl ConndService { for profile in to_import { self.wifi_profile_add( &profile.ssid, - profile.sec, + profile.sec.into(), &profile.psk, profile.hidden, ) @@ -294,7 +295,13 @@ impl ConndService { return Ok(()); }; - let profiles = self.nm.list_wifi_profiles().await?; + let profiles: Vec<_> = self + .nm + .list_wifi_profiles() + .await? + .into_iter() + .map(StoredWifiProfile::from) + .collect(); let mut bytes = Vec::new(); ciborium::ser::into_writer(&profiles, &mut bytes)?; @@ -491,3 +498,137 @@ impl TryFrom for Auth { Ok(auth) } } + +/// Wifiprofile type used when serializing / deserializing to store on secure storage. +/// Do NOT change the types here unless you want wifi profiles to not be deserialized properly and +/// all orbs to ask for Wifi QR D: -vmenge +#[derive(Debug, Serialize, Deserialize)] +pub struct StoredWifiProfile { + pub id: String, + pub uuid: String, + pub ssid: String, + pub sec: StoredWifiSec, + pub psk: String, + pub autoconnect: bool, + pub priority: i32, + pub hidden: bool, + pub path: String, +} + +/// WifiSecurity type used when serializing / deserializing to store on secure storage. +/// Do NOT change the types here unless you want wifi profiles to not be deserialized properly and +/// all orbs to ask for Wifi QR D: -vmenge +#[derive(Debug, Serialize, Deserialize)] +pub enum StoredWifiSec { + /// No protection (or RSN IE present but no auth/key-mgmt required). + Open, + /// Enhanced Open (OWE): opportunistic encryption without authentication. + Owe, + /// OWE transition mode: AP advertises open + OWE BSSID pair. + OweTransition, + /// Legacy WEP (avoid). + Wep, + /// WPA1 with PSK (legacy). + Wpa1Psk, + /// WPA1 with 802.1X/EAP (legacy enterprise). + Wpa1Eap, + /// WPA2-Personal (PSK). + Wpa2Psk, + /// WPA3-Personal (SAE). + Wpa3Sae, + /// WPA2/WPA3 mixed (PSK + SAE). + Wpa2Wpa3Transitional, + /// WPA2/3-Enterprise (802.1X/EAP). + Enterprise, + /// Couldn’t classify from flags. + Unknown, +} + +impl From for StoredWifiProfile { + fn from(value: WifiProfile) -> Self { + Self { + id: value.id, + uuid: value.uuid, + ssid: value.ssid, + sec: value.sec.into(), + psk: value.psk, + autoconnect: value.autoconnect, + priority: value.priority, + hidden: value.hidden, + path: value.path, + } + } +} + +impl From for StoredWifiSec { + fn from(value: WifiSec) -> Self { + match value { + WifiSec::Open => Self::Open, + WifiSec::Owe => Self::Owe, + WifiSec::OweTransition => Self::OweTransition, + WifiSec::Wep => Self::Wep, + WifiSec::Wpa1Psk => Self::Wpa1Psk, + WifiSec::Wpa1Eap => Self::Wpa1Eap, + WifiSec::Wpa2Psk => Self::Wpa2Psk, + WifiSec::Wpa3Sae => Self::Wpa3Sae, + WifiSec::Wpa2Wpa3Transitional => Self::Wpa2Wpa3Transitional, + WifiSec::Enterprise => Self::Enterprise, + WifiSec::Unknown => Self::Unknown, + } + } +} + +impl From for WifiSec { + fn from(value: StoredWifiSec) -> Self { + match value { + StoredWifiSec::Open => Self::Open, + StoredWifiSec::Owe => Self::Owe, + StoredWifiSec::OweTransition => Self::OweTransition, + StoredWifiSec::Wep => Self::Wep, + StoredWifiSec::Wpa1Psk => Self::Wpa1Psk, + StoredWifiSec::Wpa1Eap => Self::Wpa1Eap, + StoredWifiSec::Wpa2Psk => Self::Wpa2Psk, + StoredWifiSec::Wpa3Sae => Self::Wpa3Sae, + StoredWifiSec::Wpa2Wpa3Transitional => Self::Wpa2Wpa3Transitional, + StoredWifiSec::Enterprise => Self::Enterprise, + StoredWifiSec::Unknown => Self::Unknown, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// backcompat test: WifiProfile was previously serialized directly to secure + /// storage instead of using a separate StoredWifiProfile type. + #[test] + fn wifi_profile_deserializes_as_stored_wifi_profile() { + let profile = WifiProfile { + id: "my-network".into(), + uuid: "550e8400-e29b-41d4-a716-446655440000".into(), + ssid: "my-network".into(), + sec: WifiSec::Wpa2Psk, + psk: "hunter2".into(), + autoconnect: true, + priority: 10, + hidden: false, + path: "/org/freedesktop/NetworkManager/Settings/1".into(), + }; + + let mut bytes = Vec::new(); + ciborium::ser::into_writer(&profile, &mut bytes).unwrap(); + + let stored: StoredWifiProfile = + ciborium::de::from_reader(bytes.as_slice()).unwrap(); + + assert_eq!(stored.id, profile.id); + assert_eq!(stored.uuid, profile.uuid); + assert_eq!(stored.ssid, profile.ssid); + assert_eq!(stored.psk, profile.psk); + assert_eq!(stored.autoconnect, profile.autoconnect); + assert_eq!(stored.priority, profile.priority); + assert_eq!(stored.hidden, profile.hidden); + assert_eq!(stored.path, profile.path); + } +} From 0aa4b5f44cd3551b64a5f5fd5502f699b1414e39 Mon Sep 17 00:00:00 2001 From: AlexKaravaev <30314738+AlexKaravaev@users.noreply.github.com> Date: Wed, 11 Mar 2026 23:08:38 +0100 Subject: [PATCH 09/66] fix: update the throttle on config to 90seconds (#1069) Update the config throttle to publish every 90 seconds --- orb-backend-status/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orb-backend-status/src/lib.rs b/orb-backend-status/src/lib.rs index 61f4a779a..353545a65 100644 --- a/orb-backend-status/src/lib.rs +++ b/orb-backend-status/src/lib.rs @@ -135,7 +135,7 @@ pub async fn program( .oes_reroute( "core/config", Duration::from_millis(100), - Duration::from_secs(1), + Duration::from_secs(90), ) .run() .await?; From 6cc316d3780c3fa9b83c973363c46b69e4afe3c4 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Fri, 13 Mar 2026 15:52:54 +0100 Subject: [PATCH 10/66] fix: remote commands use orb_config correctly (#1070) before the orb_config didn't use the file values --- hil/src/commands/cmd.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/hil/src/commands/cmd.rs b/hil/src/commands/cmd.rs index e6bde4550..e79184bbb 100644 --- a/hil/src/commands/cmd.rs +++ b/hil/src/commands/cmd.rs @@ -78,15 +78,17 @@ pub struct Cmd { impl Cmd { pub async fn run(self) -> Result<()> { + let orb_config = self.orb.use_file_if_exists()?; + if let Some(remote_transport) = self.transport.remote_transport() { - return self.run_remote(remote_transport).await; + return self.run_remote(remote_transport, &orb_config).await; } - self.run_serial().await + self.run_serial(&orb_config).await } - async fn run_serial(self) -> Result<()> { - let serial_path = if let Some(custom_path) = self.orb.serial_path.as_ref() { + async fn run_serial(self, orb_config: &OrbConfig) -> Result<()> { + let serial_path = if let Some(custom_path) = orb_config.serial_path.as_ref() { custom_path.as_path() } else { std::path::Path::new(crate::serial::DEFAULT_SERIAL_PATH) @@ -105,7 +107,11 @@ impl Cmd { run_inner(serial_reader, serial_writer, self.cmd, self.timeout).await } - async fn run_remote(self, transport: RemoteTransport) -> Result<()> { + async fn run_remote( + self, + transport: RemoteTransport, + orb_config: &OrbConfig, + ) -> Result<()> { let auth = self.resolve_remote_auth(transport)?; let connect_args = RemoteConnectArgs { @@ -113,9 +119,9 @@ impl Cmd { hostname: match transport { // teleport needs to resolve the hostname, so we ignore it RemoteTransport::Teleport => None, - RemoteTransport::Ssh => self.orb.get_hostname(), + RemoteTransport::Ssh => orb_config.get_hostname(), }, - orb_id: self.orb.orb_id, + orb_id: orb_config.orb_id.clone(), username: self.username, port: self.port, auth, From 90d02c72a97c0fb4f26dd3d80ce3945ff81f511d Mon Sep 17 00:00:00 2001 From: Sfikas Date: Sun, 15 Mar 2026 14:38:52 +0100 Subject: [PATCH 11/66] feat(scripts): a couple options to ssh (#1073) adds `-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null` so that scripts do not fail if the host is unknown --- scripts/{fetch-certs.sh => fetch-keys.sh} | 6 +++++- scripts/remote-provision.sh | 12 ++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) rename scripts/{fetch-certs.sh => fetch-keys.sh} (91%) diff --git a/scripts/fetch-certs.sh b/scripts/fetch-keys.sh similarity index 91% rename from scripts/fetch-certs.sh rename to scripts/fetch-keys.sh index f36ceff7a..163d094a9 100755 --- a/scripts/fetch-certs.sh +++ b/scripts/fetch-keys.sh @@ -30,6 +30,7 @@ main() { local short=false local passphrase="" local scp_prefix="tsh" + local scp_options="" local positional_args=() while [[ $# -gt 0 ]]; do arg="${1}"; shift @@ -61,6 +62,7 @@ main() { usage; exit 1 fi scp_prefix="sshpass -p "${passphrase}"" + scp_options="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" fi local destination_folder="${2}" @@ -92,7 +94,9 @@ main() { continue fi echo "Copying ${file} from ${remote}..." - if ! ${scp_prefix} scp "worldcoin@${remote}:/usr/persistent/se/keystore/${file}" "${destination_folder}/"; then + if ! ${scp_prefix} scp ${scp_options} \ + worldcoin@"${remote}":/usr/persistent/se/keystore/"${file}" \ + "${destination_folder}/"; then echo "Error: Failed to copy ${file}" fi done diff --git a/scripts/remote-provision.sh b/scripts/remote-provision.sh index b63119421..f5428fcfd 100755 --- a/scripts/remote-provision.sh +++ b/scripts/remote-provision.sh @@ -32,6 +32,7 @@ provision_device() { local user="worldcoin" local se_dir="/usr/persistent/se" + local ssh_options="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" local key_dir="${se_dir}/keystore" if [[ "${ssh_prefix}" == "tsh" ]]; then user="root" @@ -48,7 +49,7 @@ provision_device() { exit 0 fi fi - ${ssh_prefix} ssh "${user}@${remote}" bash --noprofile --norc < /dev/null 2>&1; then if [[ -n "${plug_trust}" ]]; then - ${ssh_prefix} scp "${plug_trust}" worldcoin@"${remote}:/tmp/plug_and_trust.tar.gz" - ${ssh_prefix} ssh worldcoin@"${remote}" bash --noprofile --norc < Date: Sun, 15 Mar 2026 15:03:43 +0100 Subject: [PATCH 12/66] fix: Generate Json & Upload Keys (#1074) This pull request introduces a new Bash script, `scripts/upload-keys.sh`, which automates the upload of Orb key material and certificates to the backend. The script provides robust error handling, flexible options for environment and authentication, and supports both normal and dry-run modes. The most important changes are grouped below: **Script functionality and robustness:** * Added a comprehensive Bash script (`scripts/upload-keys.sh`) for uploading Orb keys and certificates, including error handling, input validation, and usage instructions. * Implemented support for both production and staging environments, with environment selection via command-line options or environment variables. * Provided dry-run mode to generate and write JSON payloads without making network requests, aiding testing and debugging. **Payload generation and upload:** * Added functions for generating payloads for attestation, signup, chipid keys, and certificates, including base64 encoding and JSON formatting for backend compatibility. * Integrated Cloudflared authentication for secure backend access and automated curl POST requests to relevant API endpoints. --- scripts/upload-keys.sh | 381 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100755 scripts/upload-keys.sh diff --git a/scripts/upload-keys.sh b/scripts/upload-keys.sh new file mode 100755 index 000000000..dd936c9cc --- /dev/null +++ b/scripts/upload-keys.sh @@ -0,0 +1,381 @@ +#!/usr/bin/env bash + +set -o errexit +set -o errtrace +set -o nounset +set -o pipefail + +usage() { + cat <<'EOF' +Usage: script.sh [OPTIONS] + +Options: + -h, --help Display this help message + -t, --bearer-token Bearer token for authentication + -b, --backend (stage|prod) Target the stage or prod backend + -s, --short Short upload (skip attestation cert) + -n, --dry-run Print/write payloads without making curl requests + +Environment variables (overridden by options): + FM_CLI_ENV: Must be either 'stage' or 'prod' + FM_CLI_ORB_AUTH_INTERNAL_TOKEN: Bearer token for authentication + +Example: + script.sh -t -b stage 349df8b0 /path/to/provisioning_material +EOF +} + +get_cloudflared_token() { + local -r domain="${1}" + cloudflared access login --quiet "${domain}" + cloudflared access token -app="${domain}" +} + +require_file() { + local -r path="${1}" + if [[ ! -f "${path}" ]]; then + echo "Error: Required file not found: ${path}" >&2 + exit 1 + fi +} + +make_key_payload() { + local -r orb_id="${1}" + local -r key_type="${2}" + local -r key_file="${3}" + local -r sig_file="${4}" + local -r extra_file="${5}" + + require_file "${key_file}" + require_file "${sig_file}" + require_file "${extra_file}" + + local key_value + local signature_b64 + local extra_b64 + + key_value="$(cat "${key_file}")" + signature_b64="$(base64 -w 0 "${sig_file}")" + extra_b64="$(base64 -w 0 "${extra_file}")" + + jq -n \ + --arg orbId "${orb_id}" \ + --arg type "${key_type}" \ + --arg key "${key_value}" \ + --arg signature "${signature_b64}" \ + --arg extraData "${extra_b64}" \ + '{ + orbId: $orbId, + type: $type, + key: $key, + signature: $signature, + extraData: $extraData, + active: true + }' +} + +make_chipid_payload() { + local -r orb_id="${1}" + local -r chipid_file="${2}" + local -r sig_file="${3}" + local -r extra_file="${4}" + + require_file "${chipid_file}" + require_file "${sig_file}" + require_file "${extra_file}" + + local chipid_b64 + local signature_b64 + local extra_b64 + + chipid_b64="$(base64 -w 0 "${chipid_file}")" + signature_b64="$(base64 -w 0 "${sig_file}")" + extra_b64="$(base64 -w 0 "${extra_file}")" + + jq -n \ + --arg orbId "${orb_id}" \ + --arg key "${chipid_b64}" \ + --arg signature "${signature_b64}" \ + --arg extraData "${extra_b64}" \ + '{ + orbId: $orbId, + type: "chipid", + key: $key, + signature: $signature, + extraData: $extraData, + active: true + }' +} + +make_certificate_payload() { + local -r orb_id="${1}" + local -r cert_file="${2}" + + require_file "${cert_file}" + + local certificate_value + certificate_value="$(cat "${cert_file}")" + + jq -n \ + --arg orbId "${orb_id}" \ + --arg certificate "${certificate_value}" \ + '{ + orbId: $orbId, + certificate: $certificate + }' +} + +make_dry_run_json() { + local -r orb_id="${1}" + local -r keypath="${2}" + + local attestation_key_file="${keypath}/sss_70000001_0002_0040.bin" + local signup_key_file="${keypath}/sss_70000002_0002_0040.bin" + local chipid_key_file="${keypath}/7fff0206.chip_id.raw" + + local attestation_sig_file="${keypath}/70000001.signature.raw" + local attestation_extra_file="${keypath}/70000001.extra.raw" + local signup_sig_file="${keypath}/70000002.signature.raw" + local signup_extra_file="${keypath}/70000002.extra.raw" + local chipid_sig_file="${keypath}/7fff0206.signature.raw" + local chipid_extra_file="${keypath}/7fff0206.extra.raw" + + require_file "${attestation_key_file}" + require_file "${signup_key_file}" + require_file "${chipid_key_file}" + require_file "${attestation_sig_file}" + require_file "${attestation_extra_file}" + require_file "${signup_sig_file}" + require_file "${signup_extra_file}" + require_file "${chipid_sig_file}" + require_file "${chipid_extra_file}" + + local attestation_key signup_key chipid_key + local attestation_sig attestation_extra signup_sig signup_extra chipid_sig chipid_extra + + attestation_key="$(cat "${attestation_key_file}")" + signup_key="$(cat "${signup_key_file}")" + chipid_key="$(base64 -w 0 "${chipid_key_file}")" + + attestation_sig="$(base64 -w 0 "${attestation_sig_file}")" + attestation_extra="$(base64 -w 0 "${attestation_extra_file}")" + signup_sig="$(base64 -w 0 "${signup_sig_file}")" + signup_extra="$(base64 -w 0 "${signup_extra_file}")" + chipid_sig="$(base64 -w 0 "${chipid_sig_file}")" + chipid_extra="$(base64 -w 0 "${chipid_extra_file}")" + + jq -n \ + --arg orbId "${orb_id}" \ + --arg attestationKey "${attestation_key}" \ + --arg attestationSig "${attestation_sig}" \ + --arg attestationExtra "${attestation_extra}" \ + --arg chipidKey "${chipid_key}" \ + --arg chipidSig "${chipid_sig}" \ + --arg chipidExtra "${chipid_extra}" \ + --arg signupKey "${signup_key}" \ + --arg signupSig "${signup_sig}" \ + --arg signupExtra "${signup_extra}" \ + '[ + { + orbId: $orbId, + key: $attestationKey, + type: "attestation", + active: true, + extraData: { "$binary": { "base64": $attestationExtra, "subType": "00" } }, + signature: { "$binary": { "base64": $attestationSig, "subType": "00" } } + }, + { + orbId: $orbId, + type: "chipid", + active: true, + key: $chipidKey, + extraData: { "$binary": { "base64": $chipidExtra, "subType": "00" } }, + signature: { "$binary": { "base64": $chipidSig, "subType": "00" } } + }, + { + orbId: $orbId, + key: $signupKey, + type: "signup", + active: true, + extraData: { "$binary": { "base64": $signupExtra, "subType": "00" } }, + signature: { "$binary": { "base64": $signupSig, "subType": "00" } } + } + ]' +} + +post_json() { + local -r url="${1}" + local -r bearer="${2}" + local -r cf_token="${3}" + local -r payload="${4}" + + curl --fail --location \ + -H "Authorization: Bearer ${bearer}" \ + -H "cf-access-token: ${cf_token}" \ + -H "Content-Type: application/json" \ + -X POST "${url}" \ + -d "${payload}" +} + +main() { + local bearer="${FM_CLI_ORB_AUTH_INTERNAL_TOKEN:-}" + local backend="${FM_CLI_ENV:-}" + local positional_args=() + local short=0 + local dry_run=0 + local arg + + while [[ "$#" -gt 0 ]]; do + arg="${1}" + shift + case "${arg}" in + -h|--help) + usage + exit 0 + ;; + -t|--bearer-token|--token) + bearer="${1}" + shift + ;; + -b|--backend) + backend="${1}" + shift + ;; + -s|--short) + short=1 + ;; + -n|--dry-run) + dry_run=1 + ;; + -*) + echo "Unknown option: ${arg}" >&2 + usage + exit 1 + ;; + *) + positional_args+=("${arg}") + ;; + esac + done + + set -- "${positional_args[@]}" + + if [[ $# -ne 2 ]]; then + echo "Error: must pass " >&2 + usage + exit 1 + fi + + local -r orb_id="${1}" + local -r keypath="${2}" + + if [[ ! -d "${keypath}" ]]; then + echo "Error: Keypath directory '${keypath}' does not exist." >&2 + exit 1 + fi + + if [[ ${dry_run} -eq 0 ]]; then + if [[ -z "${bearer}" ]]; then + echo "Bearer token not found. Please export FM_CLI_ORB_AUTH_INTERNAL_TOKEN, or pass it as an argument: -t " >&2 + exit 1 + fi + + if [[ -z "${backend}" ]]; then + echo "Environment not found. Please export FM_CLI_ENV, or pass it as an argument: -b (stage|prod)" >&2 + exit 1 + fi + + if [[ "${backend}" != "prod" && "${backend}" != "stage" ]]; then + echo "Invalid environment: ${backend}. Must be either 'prod' or 'stage'." >&2 + exit 1 + fi + fi + + local domain + if [[ "${backend}" == "prod" ]]; then + domain="auth.internal.orb.worldcoin.dev" + else + domain="auth.internal.stage.orb.worldcoin.dev" + fi + + if [[ ${dry_run} -eq 1 ]]; then + echo "=== DRY RUN MODE ===" + echo "Orb ID: ${orb_id}" + echo "Keypath: ${keypath}" + + local json_output="${keypath}/auth.keys.json" + make_dry_run_json "${orb_id}" "${keypath}" > "${json_output}" + echo "JSON written to: ${json_output}" + exit 0 + fi + + echo "Getting Cloudflared access token..." + local cf_token + cf_token="$(get_cloudflared_token "${domain}")" + + if [[ ${short} -eq 0 ]]; then + local cert_payload + cert_payload="$( + make_certificate_payload \ + "${orb_id}" \ + "${keypath}/f0000013.cert" + )" + + post_json \ + "https://${domain}/api/v1/certificate" \ + "${bearer}" \ + "${cf_token}" \ + "${cert_payload}" + fi + + local signup_payload + signup_payload="$( + make_key_payload \ + "${orb_id}" \ + "signup" \ + "${keypath}/sss_70000002_0002_0040.bin" \ + "${keypath}/70000002.signature.raw" \ + "${keypath}/70000002.extra.raw" + )" + + post_json \ + "https://${domain}/api/v1/key" \ + "${bearer}" \ + "${cf_token}" \ + "${signup_payload}" + + local attestation_payload + attestation_payload="$( + make_key_payload \ + "${orb_id}" \ + "attestation" \ + "${keypath}/sss_70000001_0002_0040.bin" \ + "${keypath}/70000001.signature.raw" \ + "${keypath}/70000001.extra.raw" + )" + + post_json \ + "https://${domain}/api/v1/key" \ + "${bearer}" \ + "${cf_token}" \ + "${attestation_payload}" + + local chipid_payload + chipid_payload="$( + make_chipid_payload \ + "${orb_id}" \ + "${keypath}/7fff0206.chip_id.raw" \ + "${keypath}/7fff0206.signature.raw" \ + "${keypath}/7fff0206.extra.raw" + )" + + post_json \ + "https://${domain}/api/v1/key" \ + "${bearer}" \ + "${cf_token}" \ + "${chipid_payload}" +} + +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi From 441425835a42d5e74fa3b4bcafb777403a411974 Mon Sep 17 00:00:00 2001 From: chrisgalanis <50170911+chrisgalanis@users.noreply.github.com> Date: Mon, 16 Mar 2026 15:38:44 +0100 Subject: [PATCH 13/66] feat: Make HILs Great Again (#1076) This pull request introduces several improvements and new features to the hardware-in-the-loop (HIL) test suite, focusing on relay board support, OTA result reporting, and reliability enhancements. The most significant change is the addition of support for Numato USB relay boards alongside the existing USB HID relay boards, unified under a new `Relay` abstraction. OTA result reporting is now more detailed and user-friendly, and recovery pin handling during device reboot is more robust. Several minor improvements and refactors are included to support these changes. Relay board support and abstraction: * Added support for Numato USB relay boards (serial protocol), alongside USB HID relay boards, using a unified `Relay` type and driver abstraction in `hil/src/relay.rs` and updated configuration in `hil/src/orb.rs`. [[1]](diffhunk://#diff-ec28b07638c10f082354a7d11760c38b89fd7ed2155c2b4eb9c8affb58292054L1-R16) [[2]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfL27-R33) [[3]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfL71-R81) [[4]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfL158-R190) * Updated relay channel handling and validation to accommodate both 1-indexed (USB HID) and 0-indexed (Numato) protocols, with improved documentation and error messages. [[1]](diffhunk://#diff-ec28b07638c10f082354a7d11760c38b89fd7ed2155c2b4eb9c8affb58292054L1-R16) [[2]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfL71-R81) [[3]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfL158-R190) OTA result reporting improvements: * Refactored OTA result reporting to print detailed summaries, including boot logs, hardware states, and statuses for each test step, replacing the previous file listing with a more readable summary. Recovery pin handling and reboot reliability: * Improved recovery pin handling during device reboot by holding the pin in normal boot mode for the entire boot process, releasing it only after the device is confirmed online, and using a background thread for pin control. [[1]](diffhunk://#diff-ea3806106fed5cd38c7fb89471973b5dd24656ee7c4c3dc99b3ba9043eb7cfe7L29-R53) [[2]](diffhunk://#diff-ea3806106fed5cd38c7fb89471973b5dd24656ee7c4c3dc99b3ba9043eb7cfe7L71-R69) [[3]](diffhunk://#diff-ea3806106fed5cd38c7fb89471973b5dd24656ee7c4c3dc99b3ba9043eb7cfe7R89-R98) * Added a brief delay after setting the recovery pin to normal boot mode to prevent power down when using FTDI. Minor improvements and refactors: * Increased login prompt timeout and updated the matching pattern for improved robustness. * Updated imports and cleaned up unused code in relay and reboot modules. [[1]](diffhunk://#diff-ea3806106fed5cd38c7fb89471973b5dd24656ee7c4c3dc99b3ba9043eb7cfe7L1-R1) [[2]](diffhunk://#diff-8906451cd7ad39c7be0706afd4805c58409a5b7155fa5676fd0418a84bba38bfR8) These changes collectively enhance hardware compatibility, reliability, and user experience in the HIL test suite. --------- Co-authored-by: Claude Sonnet 4.6 --- flake.lock | 18 +-- hil/src/commands/login.rs | 4 +- hil/src/commands/ota/mod.rs | 161 +++++++++++---------- hil/src/commands/ota/reboot.rs | 52 ++++--- hil/src/commands/set_recovery_pin.rs | 1 + hil/src/orb.rs | 46 +++--- hil/src/relay.rs | 204 +++++++++++++++++---------- hil/src/rts.rs | 30 ++-- hil/src/serial/mod.rs | 26 +++- 9 files changed, 328 insertions(+), 214 deletions(-) diff --git a/flake.lock b/flake.lock index 8463cc3d6..5e0c63446 100644 --- a/flake.lock +++ b/flake.lock @@ -29,11 +29,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1757400094, - "narHash": "sha256-5Rcs6juMoMTaMJSR1glravl4QB9yLAFBD8s7KLi4kdQ=", + "lastModified": 1773385614, + "narHash": "sha256-1+i0VQqlGLcWWwZLexCnyN6lm2dKRQzYm1cW7V9Nckk=", "owner": "nix-community", "repo": "fenix", - "rev": "0682b9b518792c9428865c511a4c40c9ad85c243", + "rev": "812445139cfdcf6824b1bce1d8e67361fc4e472b", "type": "github" }, "original": { @@ -83,11 +83,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1757408970, - "narHash": "sha256-aSgK4BLNFFGvDTNKPeB28lVXYqVn8RdyXDNAvgGq+k0=", + "lastModified": 1767313136, + "narHash": "sha256-16KkgfdYqjaeRGBaYsNrhPRRENs0qzkQVUooNHtoy2w=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d179d77c139e0a3f5c416477f7747e9d6b7ec315", + "rev": "ac62194c3917d5f474c1a844b6fd6da2db95077d", "type": "github" }, "original": { @@ -193,11 +193,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1757362324, - "narHash": "sha256-/PAhxheUq4WBrW5i/JHzcCqK5fGWwLKdH6/Lu1tyS18=", + "lastModified": 1773326183, + "narHash": "sha256-tj3piRd9RnnP36HwHmQD4O4XZeowsH/rvMeyp9Pmot0=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "9edc9cbe5d8e832b5864e09854fa94861697d2fd", + "rev": "6254616e97f358e67b70dfc0463687f5f7911c1a", "type": "github" }, "original": { diff --git a/hil/src/commands/login.rs b/hil/src/commands/login.rs index 9aff619c5..f36d386e5 100644 --- a/hil/src/commands/login.rs +++ b/hil/src/commands/login.rs @@ -151,8 +151,8 @@ impl Login { .await .wrap_err("error while typing username")?; tokio::time::timeout( - Duration::from_millis(45000), - wait_for_pattern("worldcoin@id".as_bytes().to_owned(), serial_rx_copy), + Duration::from_millis(95000), + wait_for_pattern("worldcoin@".as_bytes().to_owned(), serial_rx_copy), ) .await .wrap_err("timeout while waiting for bash prompt")? diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 8c97ab9f9..bd3166d01 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -182,124 +182,129 @@ impl Ota { info!("Capsule update status: {}", capsule_status); info!("Running check-my-orb"); - match verify::run_check_my_orb(&session).await { + let check_my_orb_status = match verify::run_check_my_orb(&session).await { Ok(output) => { - println!("CHECK_MY_ORB_STATUS=SUCCESS"); info!("check-my-orb completed successfully"); println!("CHECK_MY_ORB_OUTPUT_START"); println!("{output}"); println!("CHECK_MY_ORB_OUTPUT_END"); + println!("CHECK_MY_ORB_STATUS=SUCCESS"); + "SUCCESS" } Err(e) => { println!("CHECK_MY_ORB_EXECUTION_FAILED: {e}"); println!("CHECK_MY_ORB_STATUS=FAILED"); + "FAILED" } - } + }; info!("Getting hardware states"); - match verify::run_mcu_util_info(&session).await { - Ok(output) => { - match check_main_board_versions_match(&output) { - Ok(true) => { - if let Ok(true) = check_jetson_post_ota(&output) { - println!("MAIN_MCU_POST_OTA_STATUS=SUCCESS"); - } else { + let (main_mcu_status, security_mcu_status) = + match verify::run_mcu_util_info(&session).await { + Ok(output) => { + println!("ORB_MCU_UTIL_INFO_OUTPUT_START"); + println!("{output}"); + println!("ORB_MCU_UTIL_INFO_OUTPUT_END"); + + let main = match check_main_board_versions_match(&output) { + Ok(true) => { + if let Ok(true) = check_jetson_post_ota(&output) { + println!("MAIN_MCU_POST_OTA_STATUS=SUCCESS"); + "SUCCESS" + } else { + println!("MAIN_MCU_POST_OTA_STATUS=FAILED"); + "FAILED" + } + } + Ok(false) => { + println!("MAIN_MCU_POST_OTA_STATUS=FAILED"); + "FAILED" + } + Err(e) => { + println!("MAIN_MCU_POST_OTA_EXECUTION_FAILED: {e}"); println!("MAIN_MCU_POST_OTA_STATUS=FAILED"); + "FAILED" } - } - Ok(false) => { - println!("MAIN_MCU_POST_OTA_STATUS=FAILED"); - } - Err(e) => { - println!("MAIN_MCU_POST_OTA_EXECUTION_FAILED: {e}"); - println!("MAIN_MCU_POST_OTA_STATUS=FAILED"); - } + }; + let sec = match check_security_board_versions_match(&output) { + Ok(true) => { + println!("SECURITY_MCU_POST_OTA_STATUS=SUCCESS"); + "SUCCESS" + } + Ok(false) => { + println!("SECURITY_MCU_POST_OTA_STATUS=FAILED"); + "FAILED" + } + Err(e) => { + println!("SECURITY_MCU_POST_OTA_EXECUTION_FAILED: {e}"); + println!("SECURITY_MCU_POST_OTA_STATUS=FAILED"); + "FAILED" + } + }; + (main, sec) } - match check_security_board_versions_match(&output) { - Ok(true) => { - println!("SECURITY_MCU_POST_OTA_STATUS=SUCCESS"); - } - Ok(false) => { - println!("SECURITY_MCU_POST_OTA_STATUS=FAILED"); - } - Err(e) => { - println!("SECURITY_MCU_POST_OTA_EXECUTION_FAILED: {e}"); - println!("SECURITY_MCU_POST_OTA_STATUS=FAILED"); - } + Err(e) => { + println!("ORB_MCU_UTIL_INFO_EXECUTION_FAILED: {e}"); + println!("MCU_UTIL_STATUS=FAILED"); + ("FAILED", "FAILED") } - - // print full output for easier debugging - println!("ORB_MCU_UTIL_INFO_OUTPUT_START"); - println!("{output}"); - println!("ORB_MCU_UTIL_INFO_OUTPUT_END"); - } - Err(e) => { - println!("ORB_MCU_UTIL_INFO_EXECUTION_FAILED: {e}"); - println!("MCU_UTIL_STATUS=FAILED"); - } - } + }; info!("Getting last boot time"); - match verify::get_boot_time(&session).await { - Ok(boot_time) => { - println!("BOOT_TIME"); - println!("{boot_time}"); + let boot_time = match verify::get_boot_time(&session).await { + Ok(t) => { + println!("BOOT_TIME={}", t.trim()); + Some(t) } Err(e) => { println!("GET_BOOT_TIME=FAILED: {e}"); + None } - } + }; println!("OTA_RESULT=SUCCESS"); println!("OTA_VERSION={}", self.target_version); println!("OTA_SLOT_FINAL={}", current_slot); println!("OTA_WIPE_OVERLAYS_FINAL={}", wipe_overlays_status); - // Print all result files for easy collection/upload - self.print_result_files(); - - info!("OTA update completed successfully!"); - Ok(()) - } - - fn print_result_files(&self) { let platform_name = self .orb_config .platform - .map(|p| format!("{}", p)) + .map(|p| format!("{p}")) .unwrap_or_else(|| "unknown".to_string()); let log_dir = self .log_file .parent() .unwrap_or_else(|| std::path::Path::new(".")); - - println!("\n========================================"); - println!("OTA TEST RESULT FILES"); - println!("========================================"); - - let result_files = vec![ - self.log_file.clone(), - log_dir.join(format!("boot_log_{}_wipe_overlays.txt", platform_name)), - log_dir.join(format!("boot_log_{}_update.txt", platform_name)), - ]; - - println!("The following files contain OTA test results:"); - for file in &result_files { - if file.exists() { - println!(" ✓ {}", file.display()); - } else { - println!(" ✗ {} (not found)", file.display()); + for suffix in ["wipe_overlays", "update"] { + let path = log_dir.join(format!("boot_log_{platform_name}_{suffix}.txt")); + println!(); + println!("=== boot_log_{platform_name}_{suffix}.txt ==="); + match tokio::fs::read_to_string(&path).await { + Ok(contents) => print!("{contents}"), + Err(e) => println!(" (not available: {e})"), } } - println!("\nTo upload all files:"); - println!(" # List of files:"); - for file in &result_files { - if file.exists() { - println!(" {}", file.display()); - } + println!(); + println!("========== OTA SUMMARY =========="); + println!(" Version: {}", self.target_version); + println!(" Slot: {}", current_slot); + println!(" Wipe Overlays:{}", wipe_overlays_status); + println!(" Capsule: {}", capsule_status); + println!(" Main MCU: {}", main_mcu_status); + println!(" Security MCU: {}", security_mcu_status); + println!(" check-my-orb: {}", check_my_orb_status); + if let Some(t) = boot_time { + println!(" Boot Time: {}", t.trim()); } - println!("========================================\n"); + println!("---------------------------------"); + println!(" RESULT: SUCCESS"); + println!("================================="); + + info!("OTA update completed successfully!"); + + Ok(()) } async fn connect_remote(&self, orb_config: &OrbConfig) -> Result { diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 35c2227de..502b67b8a 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -1,6 +1,4 @@ -use crate::commands::SetRecoveryPin; -use crate::ftdi::OutputState; -use crate::orb::OrbConfig; +use crate::orb::{orb_manager_from_config, BootMode, OrbConfig}; use crate::serial::{spawn_serial_reader_task, LOGIN_PROMPT_PATTERN}; use crate::remote_cmd::RemoteSession; @@ -26,34 +24,33 @@ impl Ota { ) -> Result { info!("Waiting for reboot and device to come back online"); - // Set recovery pin HIGH for 5 seconds to prevent entering recovery mode - info!("Setting recovery pin HIGH to prevent recovery mode during reboot"); - let set_recovery = SetRecoveryPin { - state: OutputState::High, - duration: 5, - orb_config: self.orb_config.clone(), - }; + // Hold the recovery pin in normal-boot state for the entire boot process. + // + // For FTDI: set_boot_mode(Normal) sets RTS HIGH and holds the handle open. + // For relays: set_boot_mode(Normal) turns off both power and recovery channels. + let orb_config_for_pin = self.orb_config.clone(); + let (pin_release_tx, pin_release_rx) = std::sync::mpsc::channel::<()>(); + let recovery_task = tokio::task::spawn_blocking(move || -> Result<()> { + let orb_config = orb_config_for_pin.use_file_if_exists()?; + let mut orb_mgr = orb_manager_from_config(&orb_config) + .wrap_err("failed to create pin controller")?; + orb_mgr.set_boot_mode(BootMode::Normal)?; + info!("✓ Recovery pin set to normal boot mode, waiting for boot"); + // Block until signaled or sender is dropped (error path). + let _ = pin_release_rx.recv(); + info!("Recovery pin released"); - // Run recovery pin setting in background task - let recovery_task = tokio::spawn(async move { - set_recovery - .run() - .await - .wrap_err("failed to set recovery pin") + Ok(()) }); self.capture_boot_logs(log_suffix, orb_config).await?; - // Wait for recovery pin task to complete - recovery_task - .await - .wrap_err("recovery pin task panicked")??; - let start_time = Instant::now(); let timeout = Duration::from_secs(900); // 15 minutes let mut attempt_count = 0; const MAX_ATTEMPTS: u32 = 90; let mut last_error = None; + let mut found_session = None; while start_time.elapsed() < timeout && attempt_count < MAX_ATTEMPTS { attempt_count += 1; @@ -68,7 +65,8 @@ impl Ota { Ok(session) => match session.test_connection().await { Ok(_) => { info!("Device is back online and responsive after reboot (attempt {})", attempt_count); - return Ok(session); + found_session = Some(session); + break; } Err(e) => { debug!( @@ -88,6 +86,16 @@ impl Ota { } } + // Release the recovery pin now that the device is back online. + let _ = pin_release_tx.send(()); + recovery_task + .await + .wrap_err("recovery pin task panicked")??; + + if let Some(session) = found_session { + return Ok(session); + } + let elapsed = start_time.elapsed(); error!( "Device did not come back online within {:?} (attempted {} times)", diff --git a/hil/src/commands/set_recovery_pin.rs b/hil/src/commands/set_recovery_pin.rs index 98fe90746..cfe71510e 100644 --- a/hil/src/commands/set_recovery_pin.rs +++ b/hil/src/commands/set_recovery_pin.rs @@ -60,6 +60,7 @@ impl SetRecoveryPin { // IMPORTANT: Set button pin HIGH first to prevent power down // When FTDI enters bitbang mode, all pins default to LOW orb_mgr.set_boot_mode(BootMode::Normal)?; + std::thread::sleep(Duration::from_secs(2)); // Set recovery pin to desired state let mode = match state { diff --git a/hil/src/orb.rs b/hil/src/orb.rs index d918843d7..fa6e5982f 100644 --- a/hil/src/orb.rs +++ b/hil/src/orb.rs @@ -5,6 +5,7 @@ use std::fmt; use std::time::Duration; use crate::ftdi::FtdiGpio; +use crate::relay::Relay; /// Orb platform type #[derive(Debug, Clone, Copy, ValueEnum, Deserialize)] @@ -24,11 +25,12 @@ impl fmt::Display for Platform { } #[derive(Default, Debug, Clone, Copy, ValueEnum, Deserialize)] -#[serde(rename_all = "lowercase")] +#[serde(rename_all = "snake_case")] pub enum PinControlType { #[default] Ftdi, UsbRelay, + NumatoRelay, } /// Configuration for the orb, including pin controller and serial path. @@ -68,15 +70,15 @@ pub struct OrbConfig { #[arg(long)] pub desc: Option, - /// Relay board bank (etc /dev/hidraw0). Used when pin_ctrl_type = UsbRelay + /// Relay board device path. For UsbRelay: /dev/hidrawN. For NumatoRelay: /dev/ttyACMN. #[arg(long)] pub relay_bank: Option, - /// Relay channel for the power button (1-indexed). Used when pin_ctrl_type = UsbRelay. + /// Relay channel for the power button. UsbRelay: 1-indexed (1..=8). NumatoRelay: 0-indexed (0..=7). #[arg(long)] pub relay_power_channel: Option, - /// Relay channel for recovery mode (1-indexed). Used when pin_ctrl_type = UsbRelay. + /// Relay channel for recovery mode. UsbRelay: 1-indexed (1..=8). NumatoRelay: 0-indexed (0..=7). #[arg(long)] pub relay_recovery_channel: Option, } @@ -155,25 +157,37 @@ pub fn orb_manager_from_config( Ok(Box::new(configured.configure()?)) } PinControlType::UsbRelay => { - use crate::relay::{RelayChannel, UsbRelay}; - let bank: &str = config.relay_bank.as_deref().unwrap_or("/dev/hidraw0"); - let power = RelayChannel { - bank: bank.to_string(), - channel: config.relay_power_channel.unwrap_or(2), - }; - let recovery = RelayChannel { - bank: bank.to_string(), - channel: config.relay_recovery_channel.unwrap_or(1), + let bank = config.relay_bank.as_deref().unwrap_or("/dev/hidraw0"); + let power_channel = config.relay_power_channel.unwrap_or(2); + let recovery_channel = config.relay_recovery_channel.unwrap_or(1); + let (off_duration, on_duration) = match &config.platform { + Some(Platform::Diamond) => { + (Duration::from_secs(6), Duration::from_secs(3)) + } + _ => (Duration::from_secs(10), Duration::from_secs(4)), }; + Ok(Box::new(Relay::new_usb_hid( + bank, + power_channel, + recovery_channel, + off_duration, + on_duration, + )?)) + } + PinControlType::NumatoRelay => { + let device_path = config.relay_bank.as_deref().unwrap_or("/dev/ttyACM0"); + let power_channel = config.relay_power_channel.unwrap_or(1); + let recovery_channel = config.relay_recovery_channel.unwrap_or(0); let (off_duration, on_duration) = match &config.platform { Some(Platform::Diamond) => { (Duration::from_secs(6), Duration::from_secs(3)) } _ => (Duration::from_secs(10), Duration::from_secs(4)), }; - Ok(Box::new(UsbRelay::new( - power, - recovery, + Ok(Box::new(Relay::new_numato( + device_path, + power_channel, + recovery_channel, off_duration, on_duration, )?)) diff --git a/hil/src/relay.rs b/hil/src/relay.rs index bfa4ba7bb..2df2e8b82 100644 --- a/hil/src/relay.rs +++ b/hil/src/relay.rs @@ -1,16 +1,19 @@ -//! USB HID relay implementation of the [`PinController`] trait. +//! USB relay implementations of [`OrbManager`]. //! -//! Controls a USB relay board via HID reports written to `/dev/hidrawN`. +//! Supports two relay protocols behind a common [`Relay`] type: //! -//! Protocol: +//! **USB HID** (`/dev/hidrawN`): //! - Report: `[0x00, opcode, mask, 0, 0, 0, 0, 0, 0]` -//! - Opcode ON (close relay): `0xFF` -//! - Opcode OFF (open relay): `0xFD` -//! - Mask: bitmask for channels, channel N -> bit `(N - 1)` -//! - Device path: /dev/hidraw0` +//! - Opcode ON (close relay): `0xFF`; OFF (open relay): `0xFD` +//! - Mask: bitmask for channels, channel N → bit `(N - 1)` (1-indexed, 1..=8) +//! +//! **Numato USB serial** (`/dev/ttyACMN`, 9600 baud): +//! - Turn on channel N: `relay on N\r` +//! - Turn off channel N: `relay off N\r` +//! - Channels are 0-indexed (0..=7 for an 8-channel board) use std::fs::OpenOptions; -use std::io::Write; +use std::io::Write as _; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -22,96 +25,118 @@ use tracing::debug; use crate::orb::{BootMode, OrbManager}; -const RELAY_ON: u8 = 0xFF; -const RELAY_OFF: u8 = 0xFD; +const HID_ON: u8 = 0xFF; +const HID_OFF: u8 = 0xFD; + +enum RelayDriver { + UsbHid { bank: PathBuf }, + Numato { bank: PathBuf }, +} + +impl RelayDriver { + fn close_channel(&self, channel: u32) -> Result<()> { + match self { + Self::UsbHid { bank } => { + let mask = 1u8 << (channel - 1); + debug!(channel, "usb hid relay ON"); + write_hid_report(bank, HID_ON, mask) + } + Self::Numato { bank } => { + debug!(channel, "numato relay ON"); + let cmd = format!("relay on {channel}\r"); + write_serial_cmd(bank, &cmd) + } + } + } -/// Identifies a single relay on a USB relay board. -#[derive(Debug, Clone)] -pub struct RelayChannel { - /// Which relay board. Maps to `/dev/hidraw{X}`. - pub bank: String, - /// Which channel on that board (1..=8). - pub channel: u32, + fn open_channel(&self, channel: u32) -> Result<()> { + match self { + Self::UsbHid { bank } => { + let mask = 1u8 << (channel - 1); + debug!(channel, "usb hid relay OFF"); + write_hid_report(bank, HID_OFF, mask) + } + Self::Numato { bank } => { + debug!(channel, "numato relay OFF"); + let cmd = format!("relay off {channel}\r"); + write_serial_cmd(bank, &cmd) + } + } + } } -/// USB HID relay board controller implementing [`PinController`]. -pub struct UsbRelay { - power: RelayChannel, - recovery: RelayChannel, +pub struct Relay { + driver: RelayDriver, + power: u32, + recovery: u32, off_duration: Duration, on_duration: Duration, } -impl UsbRelay { - pub fn new( - power: RelayChannel, - recovery: RelayChannel, +impl Relay { + pub fn new_usb_hid( + bank: &str, + power: u32, + recovery: u32, off_duration: Duration, on_duration: Duration, ) -> Result { - validate_channel(&power, "power")?; - validate_channel(&recovery, "recovery")?; + ensure!( + (1..=8).contains(&power), + "usb hid power channel must be 1..=8, got {power}" + ); + ensure!( + (1..=8).contains(&recovery), + "usb hid recovery channel must be 1..=8, got {recovery}" + ); Ok(Self { + driver: RelayDriver::UsbHid { + bank: PathBuf::from(bank), + }, power, recovery, off_duration, on_duration, }) } -} - -fn validate_channel(ch: &RelayChannel, name: &str) -> Result<()> { - ensure!( - (1..=8).contains(&ch.channel), - "{name} channel must be 1..=8, got {}", - ch.channel - ); - - Ok(()) -} - -fn channel_to_mask(channel: u32) -> u8 { - 1u8 << (channel - 1) -} - -fn write_relay_report(device: &Path, opcode: u8, mask: u8) -> Result<()> { - let mut f = OpenOptions::new() - .write(true) - .open(device) - .wrap_err_with(|| format!("cannot open relay device: {}", device.display()))?; - - let report = [0x00u8, opcode, mask, 0, 0, 0, 0, 0, 0]; - f.write_all(&report).wrap_err_with(|| { - format!("failed writing HID report to {}", device.display()) - })?; - - Ok(()) -} -fn relay_on(ch: &RelayChannel) -> Result<()> { - let device = PathBuf::from(ch.bank.clone()); - let mask = channel_to_mask(ch.channel); - debug!(bank = ch.bank, channel = ch.channel, "relay ON"); - - write_relay_report(&device, RELAY_ON, mask) -} - -fn relay_off(ch: &RelayChannel) -> Result<()> { - let device = PathBuf::from(ch.bank.clone()); - let mask = channel_to_mask(ch.channel); - debug!(bank = ch.bank, channel = ch.channel, "relay OFF"); + pub fn new_numato( + device_path: &str, + power: u32, + recovery: u32, + off_duration: Duration, + on_duration: Duration, + ) -> Result { + ensure!( + (0..=7).contains(&power), + "numato power channel must be 0..=7, got {power}" + ); + ensure!( + (0..=7).contains(&recovery), + "numato recovery channel must be 0..=7, got {recovery}" + ); - write_relay_report(&device, RELAY_OFF, mask) + Ok(Self { + driver: RelayDriver::Numato { + bank: PathBuf::from(device_path), + }, + power, + recovery, + off_duration, + on_duration, + }) + } } -impl OrbManager for UsbRelay { +impl OrbManager for Relay { fn press_power_button(&mut self, duration: Option) -> Result<()> { - relay_on(&self.power)?; + let ch = self.power; + self.driver.close_channel(ch)?; if let Some(duration) = duration { std::thread::sleep(duration); - relay_off(&self.power)?; + self.driver.open_channel(ch)?; } Ok(()) @@ -119,14 +144,15 @@ impl OrbManager for UsbRelay { fn set_boot_mode(&mut self, mode: BootMode) -> Result<()> { match mode { - BootMode::Recovery => relay_on(&self.recovery), - BootMode::Normal => Ok(()), + BootMode::Recovery => self.driver.close_channel(self.recovery), + BootMode::Normal => self.driver.open_channel(self.recovery), } } fn hw_reset(&mut self) -> Result<()> { - relay_off(&self.power)?; - relay_off(&self.recovery)?; + self.driver.open_channel(self.power)?; + self.driver.open_channel(self.recovery)?; + Ok(()) } @@ -142,3 +168,29 @@ impl OrbManager for UsbRelay { Ok(()) } } + +fn write_hid_report(device: &Path, opcode: u8, mask: u8) -> Result<()> { + let mut f = OpenOptions::new() + .write(true) + .open(device) + .wrap_err_with(|| format!("cannot open relay device: {}", device.display()))?; + + let report = [0x00u8, opcode, mask, 0, 0, 0, 0, 0, 0]; + f.write_all(&report).wrap_err_with(|| { + format!("failed writing HID report to {}", device.display()) + })?; + + Ok(()) +} + +fn write_serial_cmd(device: &Path, cmd: &str) -> Result<()> { + let mut f = OpenOptions::new() + .write(true) + .open(device) + .wrap_err_with(|| format!("cannot open numato relay: {}", device.display()))?; + + f.write_all(cmd.as_bytes()) + .wrap_err_with(|| format!("failed writing command to {}", device.display()))?; + + Ok(()) +} diff --git a/hil/src/rts.rs b/hil/src/rts.rs index 5fd17c285..d2a43e05f 100644 --- a/hil/src/rts.rs +++ b/hil/src/rts.rs @@ -85,7 +85,7 @@ pub(crate) fn extract(path_to_rts: &Utf8Path) -> Result { let result = run_cmd! { cd $extract_dir; info extracting rts $path_to_rts; - tar xvf $path_to_rts; + tar xf $path_to_rts; info finished extract!; }; result @@ -178,6 +178,8 @@ fn populate_persistent_inner( } pub(crate) fn flash_cmd(variant: FlashVariant, extracted_dir: &Path) -> Result<()> { + use std::process::Command; + let Some(bootloader_dir) = ["ready-to-sign", "rts"] .into_iter() .filter_map(|d| { @@ -194,15 +196,23 @@ pub(crate) fn flash_cmd(variant: FlashVariant, extracted_dir: &Path) -> Result<( }; let cmd_file_name = variant.file_name(); - let result = run_cmd! { - cd $bootloader_dir; - info running $cmd_file_name; - bash $cmd_file_name; - info finished flashing!; - }; - result - .wrap_err("failed to flash rts") - .with_note(|| format!("bootloader_dir was {bootloader_dir:?}"))?; + tracing::info!("running {cmd_file_name}"); + + let status = Command::new("bash") + .arg(cmd_file_name) + .current_dir(&bootloader_dir) + .status() + .wrap_err("failed to spawn flash command")?; + + if !status.success() { + bail!( + "flash command failed with exit code {:?} (bootloader_dir was {bootloader_dir:?})", + status.code() + ); + } + + tracing::info!("finished flashing!"); + Ok(()) } diff --git a/hil/src/serial/mod.rs b/hil/src/serial/mod.rs index c1f2adabb..077cb4a10 100644 --- a/hil/src/serial/mod.rs +++ b/hil/src/serial/mod.rs @@ -38,6 +38,7 @@ pub fn spawn_serial_reader_task( let reader_task = tokio::task::spawn(async move { let mut serial_stream = pin!(ReaderStream::new(reader)); let mut stderr = tokio::io::stderr(); + let mut line_buf: Vec = Vec::new(); loop { let chunk = tokio::select! { _ = &mut kill_rx => break, @@ -46,11 +47,24 @@ pub fn spawn_serial_reader_task( let Some(chunk) = chunk.wrap_err("failed to read from serial")? else { break; }; - let _ = stderr.write_all(&chunk).await; + for &byte in chunk.iter() { + if byte == b'\n' { + line_buf.push(byte); + if !is_progress_line(&line_buf) { + let _ = stderr.write_all(&line_buf).await; + } + line_buf.clear(); + } else { + line_buf.push(byte); + } + } if let Err(SendError(_)) = serial_output_tx.send(chunk) { break; } } + if !line_buf.is_empty() && !is_progress_line(&line_buf) { + let _ = stderr.write_all(&line_buf).await; + } debug!("terminating serial task"); Ok::<(), color_eyre::Report>(()) }); @@ -58,6 +72,16 @@ pub fn spawn_serial_reader_task( (reader_task, kill_tx) } +/// Returns true for progress bar lines like `[ 515.3065 ] [.....] 100%`, +/// which should be suppressed from stderr output. +fn is_progress_line(line: &[u8]) -> bool { + line.iter() + .rev() + .find(|b| !b.is_ascii_whitespace()) + .map(|&b| b == b'%') + .unwrap_or(false) +} + #[derive(thiserror::Error, Debug)] pub enum WaitErr { #[error("stream ended without finding the pattern")] From dab2c662fe897f15ab4cc7e08e5daae024024122 Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Mon, 16 Mar 2026 11:27:10 -0400 Subject: [PATCH 14/66] feat(se050): attribute parsing (#1063) Adds support for the binary parsing of the ObjectAttributes as well as rudimentary support for the .extra.raw files. A sample CLI is provided, try running it with: `RUST_BACKTRACE=1 cargo run -p orb-se050 -- --data 60000001.extra.raw` --- Cargo.lock | 15 + Cargo.toml | 2 + se050/Cargo.toml | 22 ++ se050/example_data/.gitignore | 1 + se050/example_data/60000000.extra.raw | Bin 0 -> 61 bytes se050/example_data/60000001.extra.raw | Bin 0 -> 70 bytes se050/example_data/60000002.extra.raw | Bin 0 -> 70 bytes se050/src/lib.rs | 380 ++++++++++++++++++++++++++ se050/src/main.rs | 57 ++++ thermal-cam-ctrl/Cargo.toml | 2 +- 10 files changed, 478 insertions(+), 1 deletion(-) create mode 100644 se050/Cargo.toml create mode 100644 se050/example_data/.gitignore create mode 100644 se050/example_data/60000000.extra.raw create mode 100644 se050/example_data/60000001.extra.raw create mode 100644 se050/example_data/60000002.extra.raw create mode 100644 se050/src/lib.rs create mode 100644 se050/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 2daf1ee9a..0a6a58f75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8569,6 +8569,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "orb-se050" +version = "0.0.0" +dependencies = [ + "clap", + "color-eyre", + "derive_more 2.1.0", + "orb-telemetry", + "owo-colors 3.5.0", + "thiserror 2.0.17", + "tracing", + "tracing-subscriber", + "zerocopy", +] + [[package]] name = "orb-secure-storage-ca" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index 8f35890e5..361c4987b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ members = [ "prelude", "qr-link", "s3-helpers", + "se050", "security-utils", "seek-camera/sys", "seek-camera/wrapper", @@ -139,6 +140,7 @@ num-traits = "0.2.19" nusb = "0.2.0" optee-teec = "0.7.0" # CA optee-utee = "0.7.0" # TA +owo-colors = "3" p256 = "0.13.2" pkg-config = "0.3.32" proptest = "1.10.0" diff --git a/se050/Cargo.toml b/se050/Cargo.toml new file mode 100644 index 000000000..9423fce18 --- /dev/null +++ b/se050/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "orb-se050" +description = "se050 utilities" +authors = ["Ryan Butler "] +version = "0.0.0" +publish = false + +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +clap.workspace = true +color-eyre.workspace = true +derive_more = { workspace = true, features = ["from", "into"] } +orb-telemetry.workspace = true +owo-colors.workspace = true +thiserror.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true +zerocopy = { version = "0.8.24", features = ["derive"] } diff --git a/se050/example_data/.gitignore b/se050/example_data/.gitignore new file mode 100644 index 000000000..af6bb3019 --- /dev/null +++ b/se050/example_data/.gitignore @@ -0,0 +1 @@ +!*.raw diff --git a/se050/example_data/60000000.extra.raw b/se050/example_data/60000000.extra.raw new file mode 100644 index 0000000000000000000000000000000000000000..7353ae476f8ce22c6398da4e892ce70a235af6e3 GIT binary patch literal 61 zcmYdbU|?Wo0uo@r2qbmDEQuKpdcxL(p4|I*i`47G#~JiCn6NMeFizRp%k*l~Y?iD= ISq0J)0DHs{y#N3J literal 0 HcmV?d00001 diff --git a/se050/example_data/60000001.extra.raw b/se050/example_data/60000001.extra.raw new file mode 100644 index 0000000000000000000000000000000000000000..90286dc37261f764c2d7974393c64ba25f538b6b GIT binary patch literal 70 zcmYdbU|?iqWMD{O009mVC7{H>zyzc literal 0 HcmV?d00001 diff --git a/se050/example_data/60000002.extra.raw b/se050/example_data/60000002.extra.raw new file mode 100644 index 0000000000000000000000000000000000000000..30eb085a27d4b89bd850e94664e1e4c32efabe28 GIT binary patch literal 70 zcmYdbU|?coWMD{O009mVC7=Wp1JYVR%)lU>Ur@NR??px6r>*nya%R-7jx%9l2w) -> std::fmt::Result { + write!(f, "{:#010X}", self.0) + } +} + +impl core::fmt::Debug for ObjectId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ObjectId") + .field(&U32Formatter(self.0)) + .finish() + } +} + +impl core::fmt::Display for ObjectId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", U32Formatter(self.0)) + } +} + +impl ObjectId { + pub fn new(id: u32) -> Self { + Self(id.into()) + } +} + +impl PartialEq for ObjectId { + fn eq(&self, other: &u32) -> bool { + self.0 == *other + } +} + +/// See section 4.3.29 of AN12413 +#[derive(TryFromBytes, Immutable, KnownLayout, Debug, Eq, PartialEq, Clone, Copy)] +#[repr(u8)] +#[expect(non_camel_case_types)] +pub enum SetIndicator { + NOT_SET = 0x01, + SET = 0x02, +} + +#[derive(TryFromBytes, KnownLayout, Immutable, Debug, PartialEq, Eq)] +#[repr(C, align(1))] +pub struct ObjectAttributes { + object_identifier: ObjectId, + object_class: SecureObjectType, + authentication_indicator: SetIndicator, + authentication_attempts_counter: big_endian::U16, + authentication_object_identifier: ObjectId, + maximum_authentication_attempts: big_endian::U16, + policy_set: AttributesSuffix, +} + +/// See section 4.3.8 of AN12413 +#[derive(TryFromBytes, Immutable, KnownLayout, Debug, Eq, PartialEq, Clone, Copy)] +#[repr(u8)] +#[expect(non_camel_case_types)] +pub enum Origin { + ORIGIN_EXTERNAL = 0x01, + ORIGIN_INTERNAL = 0x02, + ORIGIN_PROVISIONED = 0x03, +} + +#[derive(Debug, Error, Eq, PartialEq)] +#[error("failed to parse origin")] +pub struct OriginParseErr; + +#[derive(TryFromBytes, KnownLayout, Immutable, Eq, PartialEq)] +#[repr(C)] +pub struct AttributesSuffix([u8]); + +impl core::fmt::Debug for AttributesSuffix { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + struct PolicySetFormatter<'a>(&'a AttributesSuffix); + + impl core::fmt::Debug for PolicySetFormatter<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut list = f.debug_list(); + for policy in self.0.policies() { + list.entry(&policy); + } + + list.finish() + } + } + + f.debug_struct("AttributesSuffix") + .field("origin", &self.origin()) + .field("policy_set", &PolicySetFormatter(self)) + .finish() + } +} + +impl AttributesSuffix { + pub fn origin(&self) -> Result { + let v = self.0.last().unwrap(); + + Origin::try_read_from_bytes(&[*v]).map_err(|_| OriginParseErr) + } + + pub fn policies(&self) -> PolicySetIter<'_> { + PolicySetIter { + attributes_suffix: self, + idx: 0, + } + } +} + +pub struct PolicySetIter<'a> { + attributes_suffix: &'a AttributesSuffix, + idx: usize, +} + +impl<'a> Iterator for PolicySetIter<'a> { + type Item = &'a Policy; + + fn next(&mut self) -> Option { + // subtract 1 to account for origin suffix + let policy_set_len = self.attributes_suffix.0.len() - 1; + if self.idx >= policy_set_len { + return None; + } + + let policy_bytes = &self.attributes_suffix.0[self.idx..policy_set_len]; + let (header, _suffix) = + zerocopy::Ref::<_, Policy>::from_prefix_with_elems(policy_bytes, 0) + .unwrap(); + let (policy, remaining_bytes) = + zerocopy::Ref::<_, Policy>::from_prefix_with_elems( + policy_bytes, + usize::from( + header.length_in_bytes + - (core::mem::size_of::() as u8) + - 4, // length of access_rule header + ), + ) + .unwrap(); + + self.idx = policy_set_len - remaining_bytes.len(); + + Some(zerocopy::Ref::into_ref(policy)) + } +} + +#[derive(FromBytes, KnownLayout, Immutable, Debug, Eq, PartialEq)] +#[repr(C)] +pub struct Policy { + pub length_in_bytes: u8, + pub authentication_object_id: ObjectId, + pub access_rule: AccessRule, +} + +#[derive(FromBytes, KnownLayout, Immutable, Debug, Eq, PartialEq)] +#[repr(C)] +pub struct AccessRule { + pub header: [u8; 4], + pub extension: [u8], +} + +pub const CHIP_ID_LEN: usize = 18; +pub const FRESHNESS_LEN: usize = 16; +pub const TIMESTAMP_LEN: usize = 12; + +#[derive(Debug)] +pub struct ExtraData<'a> { + pub object_attributes: &'a ObjectAttributes, + pub timestamp: &'a [u8; TIMESTAMP_LEN], + pub freshness: &'a [u8; FRESHNESS_LEN], + pub chip_id: &'a [u8; CHIP_ID_LEN], +} + +#[derive(Debug, Error)] +pub enum ParseExtraDataErr { + #[error("the supplied bytes were too short to be valid")] + TooShort, + #[error("error during binary parsing: {0}")] + ConvertError(#[from] ConvertErr), +} + +#[derive(Debug, thiserror::Error)] +pub enum ConvertErr { + #[error("alignment")] + Alignment, + #[error("size")] + Size, + #[error("validity")] + Validity, +} + +impl From> for ConvertErr { + fn from(value: zerocopy::ConvertError) -> Self { + use zerocopy::ConvertError; + match value { + ConvertError::Alignment(_) => Self::Alignment, + ConvertError::Size(_) => Self::Size, + ConvertError::Validity(_) => Self::Validity, + } + } +} + +impl<'a> TryFrom<&'a [u8]> for ExtraData<'a> { + type Error = ParseExtraDataErr; + + fn try_from(value: &'a [u8]) -> Result { + let Some((obj_attrs, suffix)) = + value.split_last_chunk::<{ TIMESTAMP_LEN + FRESHNESS_LEN + CHIP_ID_LEN }>() + else { + return Err(ParseExtraDataErr::TooShort); + }; + + let object_attributes = ObjectAttributes::try_ref_from_bytes(obj_attrs) + .map_err(ConvertErr::from)?; + + let (timestamp, suffix) = suffix + .split_first_chunk::() + .expect("infallible"); + + let (freshness, suffix) = suffix + .split_first_chunk::() + .expect("infallible"); + + let chip_id: &[u8; CHIP_ID_LEN] = suffix.try_into().expect("infallible"); + + Ok(Self { + object_attributes, + timestamp, + freshness, + chip_id, + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + const ORB_SESSION_KEY: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/example_data/60000000.extra.raw" + )); + const ORB_ATTESTATION_KEY: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/example_data/60000001.extra.raw" + )); + const ORB_IRIS_KEY: &[u8] = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/example_data/60000002.extra.raw" + )); + + #[test] + fn test_object_id_debug() { + let obj = ObjectId::new(0x6000_0000); + assert_eq!(format!("{obj:?}"), "ObjectId(0x60000000)"); + + let obj = ObjectId::new(0x0000_6000); + assert_eq!(format!("{obj:?}"), "ObjectId(0x00006000)"); + } + + #[test] + fn test_object_id_display() { + let obj = ObjectId::new(0x6000_0000); + assert_eq!(format!("{obj}"), "0x60000000"); + + let obj = ObjectId::new(0x0000_6000); + assert_eq!(format!("{obj}"), "0x00006000"); + } + + #[test] + fn test_orb_session_key_parses() { + let suffix_len = FRESHNESS_LEN + TIMESTAMP_LEN + CHIP_ID_LEN; + let obj_attrs = &ORB_SESSION_KEY[..ORB_SESSION_KEY.len() - suffix_len]; + + assert_eq!(obj_attrs.len() + suffix_len, ORB_SESSION_KEY.len()); + assert_eq!(suffix_len, 46); + assert_eq!(obj_attrs.len(), 15); + + let foo = ObjectAttributes::try_ref_from_bytes(obj_attrs).unwrap(); + + assert_eq!(foo.object_identifier, 0x60000000); + assert_eq!(foo.object_class, SecureObjectType::EC_PUB_KEY); + assert_eq!(foo.authentication_indicator, SetIndicator::SET); + assert_eq!(foo.authentication_attempts_counter, 0); + assert_eq!(foo.authentication_object_identifier, 0x00000000); + assert_eq!(foo.maximum_authentication_attempts, 0); + assert_eq!(foo.policy_set.origin(), Ok(Origin::ORIGIN_EXTERNAL)); + + let mut it = foo.policy_set.policies(); + + assert!(it.next().is_none()); + } + + #[test] + fn test_orb_attestation_key_parses() { + let suffix_len = FRESHNESS_LEN + TIMESTAMP_LEN + CHIP_ID_LEN; + let obj_attrs = &ORB_ATTESTATION_KEY[..ORB_ATTESTATION_KEY.len() - suffix_len]; + + assert_eq!(obj_attrs.len() + suffix_len, ORB_ATTESTATION_KEY.len()); + assert_eq!(suffix_len, 46); + assert_eq!(obj_attrs.len(), 24); + + let foo = ObjectAttributes::try_ref_from_bytes(obj_attrs).unwrap(); + + assert_eq!(foo.object_identifier, 0x60000001); + assert_eq!(foo.object_class, SecureObjectType::EC_KEY_PAIR); + assert_eq!(foo.authentication_indicator, SetIndicator::NOT_SET); + assert_eq!(foo.authentication_attempts_counter, 0); + assert_eq!(foo.authentication_object_identifier, 0x60000000); + assert_eq!(foo.maximum_authentication_attempts, 0); + assert_eq!(foo.policy_set.origin(), Ok(Origin::ORIGIN_INTERNAL)); + + let mut it = foo.policy_set.policies(); + + let p1 = it.next().unwrap(); + assert_eq!(p1.length_in_bytes, 8); + assert_eq!(p1.authentication_object_id, 0x60000000); + assert_eq!(core::mem::size_of_val(&p1.access_rule), 4); + + assert!(it.next().is_none()) + } + + #[test] + fn test_orb_iris_key_parses() { + let suffix_len = FRESHNESS_LEN + TIMESTAMP_LEN + CHIP_ID_LEN; + let obj_attrs = &ORB_IRIS_KEY[..ORB_IRIS_KEY.len() - suffix_len]; + + assert_eq!(obj_attrs.len() + suffix_len, ORB_ATTESTATION_KEY.len()); + assert_eq!(suffix_len, 46); + assert_eq!(obj_attrs.len(), 24); + + let foo = ObjectAttributes::try_ref_from_bytes(obj_attrs).unwrap(); + + assert_eq!(foo.object_identifier, 0x60000002); + assert_eq!(foo.object_class, SecureObjectType::EC_KEY_PAIR); + assert_eq!(foo.authentication_indicator, SetIndicator::NOT_SET); + assert_eq!(foo.authentication_attempts_counter, 0); + assert_eq!(foo.authentication_object_identifier, 0x60000000); + assert_eq!(foo.maximum_authentication_attempts, 0); + assert_eq!(foo.policy_set.origin(), Ok(Origin::ORIGIN_INTERNAL)); + + let mut it = foo.policy_set.policies(); + + let p1 = it.next().unwrap(); + assert_eq!(p1.length_in_bytes, 8); + assert_eq!(p1.authentication_object_id, 0x60000000); + assert_eq!(core::mem::size_of_val(&p1.access_rule), 4); + + assert!(it.next().is_none()) + } +} diff --git a/se050/src/main.rs b/se050/src/main.rs new file mode 100644 index 000000000..ae032616e --- /dev/null +++ b/se050/src/main.rs @@ -0,0 +1,57 @@ +use std::path::PathBuf; + +use clap::{ + builder::{styling::AnsiColor, Styles}, + Parser, +}; +use color_eyre::{eyre::Context, Result}; +use orb_se050::ExtraData; +use owo_colors::OwoColorize; +use tracing::{debug, info}; + +#[derive(Debug, Parser)] +#[clap( + author, + about, + version, + styles = clap_v3_styles(), +)] +struct Args { + #[arg(long)] + data: PathBuf, +} + +fn main() -> Result<()> { + color_eyre::install()?; + let _ = orb_telemetry::TelemetryConfig::new().init(); + + let args = Args::parse(); + info!("hello world"); + + let data = std::fs::read(args.data).wrap_err("failed to read extradata file")?; + debug!("read {} bytes", data.len()); + + let data: ExtraData = data + .as_slice() + .try_into() + .wrap_err("failed to parse ExtraData")?; + + println!( + "{} {:?}", + "object attributes:".bold().green(), + data.object_attributes + ); + println!("{} {:02X?}", "timestamp:".bold().green(), data.timestamp); + println!("{} {:02X?}", "freshness:".bold().green(), data.freshness); + println!("{} {:02X?}", "chip_id:".bold().green(), data.chip_id); + + Ok(()) +} + +fn clap_v3_styles() -> Styles { + Styles::styled() + .header(AnsiColor::Yellow.on_default()) + .usage(AnsiColor::Green.on_default()) + .literal(AnsiColor::Green.on_default()) + .placeholder(AnsiColor::Green.on_default()) +} diff --git a/thermal-cam-ctrl/Cargo.toml b/thermal-cam-ctrl/Cargo.toml index 414bdf433..0e28d016b 100644 --- a/thermal-cam-ctrl/Cargo.toml +++ b/thermal-cam-ctrl/Cargo.toml @@ -19,7 +19,7 @@ indicatif = "0.17" orb-build-info.workspace = true orb-info = { workspace = true, features = ["orb-id", "orb-os-release"] } orb-telemetry.workspace = true -owo-colors = "3" +owo-colors.workspace = true png = "0.17" seek-camera.workspace = true serde.workspace = true From b60d84ed467314eff147348b58d0820300a7f5de Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 16 Mar 2026 18:09:27 +0100 Subject: [PATCH 15/66] chore(hil): add 2 more HILs (#1077) add 2 more HILs to run GH jobs --- .github/workflows/deploy-hil.yaml | 2 +- .../worldcoin-hil-munich-3/configuration.nix | 17 +++++++++++++++++ .../worldcoin-hil-munich-8/configuration.nix | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-hil.yaml b/.github/workflows/deploy-hil.yaml index 9a54408d2..e32e5b91b 100644 --- a/.github/workflows/deploy-hil.yaml +++ b/.github/workflows/deploy-hil.yaml @@ -11,7 +11,7 @@ on: required: true env: - ALL_TARGETS: '["worldcoin-hil-munich-0","worldcoin-hil-munich-2","worldcoin-hil-munich-5","worldcoin-hil-munich-9","worldcoin-hil-munich-10","worldcoin-hil-munich-11"]' + ALL_TARGETS: '["worldcoin-hil-munich-0","worldcoin-hil-munich-2","worldcoin-hil-munich-3","worldcoin-hil-munich-5","worldcoin-hil-munich-8","worldcoin-hil-munich-9","worldcoin-hil-munich-10","worldcoin-hil-munich-11"]' jobs: prepare: diff --git a/nix/machines/worldcoin-hil-munich-3/configuration.nix b/nix/machines/worldcoin-hil-munich-3/configuration.nix index 449a2c968..8065ddbaf 100644 --- a/nix/machines/worldcoin-hil-munich-3/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-3/configuration.nix @@ -15,4 +15,21 @@ ../nixos-common.nix ../hil-common.nix ]; + + worldcoin.orbPlatform = "pearl"; + + environment.etc."worldcoin/orb.yaml" = { + text = '' + orb_id: 287571fc + platform: ${config.worldcoin.orbPlatform} + # Pin controller configuration for orb-hil + # Type of pin controller to use (ftdi, relay) + pin_ctrl_type: usbrelay + serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG031A7H-if00-port0" + relay_bank: "/dev/hidraw0" + relay_power_channel: 2 + relay_recovery_channel: 1 + ''; + mode = "0644"; + }; } diff --git a/nix/machines/worldcoin-hil-munich-8/configuration.nix b/nix/machines/worldcoin-hil-munich-8/configuration.nix index 449a2c968..bd5314910 100644 --- a/nix/machines/worldcoin-hil-munich-8/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-8/configuration.nix @@ -15,4 +15,21 @@ ../nixos-common.nix ../hil-common.nix ]; + + worldcoin.orbPlatform = "diamond"; + + environment.etc."worldcoin/orb.yaml" = { + text = '' + orb_id: 0aaab97e + platform: ${config.worldcoin.orbPlatform} + # Pin controller configuration for orb-hil + # Type of pin controller to use (ftdi, relay) + pin_ctrl_type: numato_relay + serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_B00370CB-if00-port0" + relay_bank: "/dev/ttyACM0" + relay_power_channel: 5 + relay_recovery_channel: 6 + ''; + mode = "0644"; + }; } From 05afa7fd926ef1acbd2a808e7dcb5dbf5ab0d216 Mon Sep 17 00:00:00 2001 From: chrisgalanis <50170911+chrisgalanis@users.noreply.github.com> Date: Mon, 16 Mar 2026 18:32:42 +0100 Subject: [PATCH 16/66] fix: Bump Orb HIL (#1078) This pull request updates the `orb-hil` package to a new version and ensures the correct checksum is used for the new release. Version and checksum update: * Updated the `version` field in `orb-hil.nix` from `"0.0.2-beta.16"` to `"0.0.2-beta.17"`, and replaced the `sha256` checksum to match the new release. --- nix/machines/worldcoin-hil-munich-0/configuration.nix | 2 +- nix/machines/worldcoin-hil-munich-10/configuration.nix | 2 +- nix/machines/worldcoin-hil-munich-11/configuration.nix | 2 +- nix/machines/worldcoin-hil-munich-2/configuration.nix | 2 +- nix/machines/worldcoin-hil-munich-5/configuration.nix | 2 +- nix/machines/worldcoin-hil-munich-9/configuration.nix | 2 +- nix/packages/orb-hil.nix | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/nix/machines/worldcoin-hil-munich-0/configuration.nix b/nix/machines/worldcoin-hil-munich-0/configuration.nix index d6577732e..c8488fc20 100644 --- a/nix/machines/worldcoin-hil-munich-0/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-0/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: usbrelay + pin_ctrl_type: usb_relay serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG01OJYI-if00-port0" relay_bank: "/dev/hidraw0" relay_power_channel: 2 diff --git a/nix/machines/worldcoin-hil-munich-10/configuration.nix b/nix/machines/worldcoin-hil-munich-10/configuration.nix index c2aa3d0e0..2ae045b19 100644 --- a/nix/machines/worldcoin-hil-munich-10/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-10/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: usbrelay + pin_ctrl_type: usb_relay serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_B00370CB-if00-port0" relay_bank: "/dev/hidraw0" relay_power_channel: 2 diff --git a/nix/machines/worldcoin-hil-munich-11/configuration.nix b/nix/machines/worldcoin-hil-munich-11/configuration.nix index db3f8435a..8667e2726 100644 --- a/nix/machines/worldcoin-hil-munich-11/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-11/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: usbrelay + pin_ctrl_type: usb_relay serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG031A17-if00-port0" relay_bank: "/dev/hidraw0" relay_power_channel: 2 diff --git a/nix/machines/worldcoin-hil-munich-2/configuration.nix b/nix/machines/worldcoin-hil-munich-2/configuration.nix index 290769d27..666ddfa2f 100644 --- a/nix/machines/worldcoin-hil-munich-2/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-2/configuration.nix @@ -25,7 +25,7 @@ # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: ftdi + pin_ctrl_type: usb_relay serial_num: BG02N9B6 serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG02N9B6-if00-port0" diff --git a/nix/machines/worldcoin-hil-munich-5/configuration.nix b/nix/machines/worldcoin-hil-munich-5/configuration.nix index e0097350e..4e64c5804 100644 --- a/nix/machines/worldcoin-hil-munich-5/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-5/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: ftdi + pin_ctrl_type: usb_relay serial_num: BG02MT7D serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG02MT7D-if00-port0" diff --git a/nix/machines/worldcoin-hil-munich-9/configuration.nix b/nix/machines/worldcoin-hil-munich-9/configuration.nix index 443ae6224..e7ba3de40 100644 --- a/nix/machines/worldcoin-hil-munich-9/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-9/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: usbrelay + pin_ctrl_type: usb_relay # serial_num: BG00ZAZ4 serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG00ZAZ4-if00-port0" relay_bank: "/dev/hidraw0" diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index 8d10fde14..eae38de30 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.16"; + version = "0.0.2-beta.17"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-LfbtJFeXcZOzAv1SgU0I1ha9wVAWfMy4Gj9pxog5sJc="; + sha256 = "sha256-+kAq2nyJInqQg/RSGn06Zncgc0G4prf+/YxA1B6ro0A="; }; dontUnpack = true; From 0adc58fb56d3d0bc4591846902c6740338f6037a Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 16 Mar 2026 18:51:06 +0100 Subject: [PATCH 17/66] feat: check recovery and do retries in orb-hil (#1071) it will replace a part of testing code from orb-os workflows --- hil/src/boot.rs | 15 ++------- hil/src/commands/reboot.rs | 69 ++++++++++++++++++++++++++++++-------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 7c9a34ad4..1c4851f41 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -23,11 +23,8 @@ pub async fn is_recovery_mode_detected() -> Result { /// The controller's reset() method is called between power-off and power-on /// to ensure pins return to their default state. #[tracing::instrument(skip(controller))] -pub async fn reboot( - recovery: bool, - mut controller: Box, -) -> Result<()> { - tokio::task::spawn_blocking(move || -> Result<(), color_eyre::Report> { +pub async fn reboot(recovery: bool, controller: &mut dyn OrbManager) -> Result<()> { + tokio::task::block_in_place(|| -> Result<()> { info!("Turning off"); controller.set_boot_mode(BootMode::Normal)?; controller.turn_off()?; @@ -50,16 +47,8 @@ pub async fn reboot( controller.set_boot_mode(mode)?; controller.turn_on()?; - controller - .destroy() - .wrap_err("failed to destroy pin controller")?; - info!("Done triggering reboot"); Ok(()) }) - .await - .wrap_err("task panicked")??; - - Ok(()) } diff --git a/hil/src/commands/reboot.rs b/hil/src/commands/reboot.rs index 27df9a070..430d9cad5 100644 --- a/hil/src/commands/reboot.rs +++ b/hil/src/commands/reboot.rs @@ -1,5 +1,8 @@ use clap::Parser; -use color_eyre::{eyre::WrapErr as _, Result}; +use color_eyre::{eyre::eyre, eyre::WrapErr as _, Result}; +use std::num::NonZeroU8; +use tokio::time::Duration; +use tracing::{info, warn}; use crate::orb::{orb_manager_from_config, OrbConfig}; @@ -8,6 +11,10 @@ use crate::orb::{orb_manager_from_config, OrbConfig}; pub struct Reboot { #[arg(short)] recovery: bool, + #[arg(short, long, default_value_t = true, action = clap::ArgAction::Set)] + make_sure: bool, + #[arg(short, long, default_value_t = NonZeroU8::new(1).unwrap())] + attempts_count: NonZeroU8, #[command(flatten)] orb_config: OrbConfig, } @@ -16,20 +23,54 @@ impl Reboot { pub async fn run(self) -> Result<()> { let orb_config = self.orb_config.use_file_if_exists()?; - let controller = tokio::task::spawn_blocking(move || { + let mut controller = tokio::task::block_in_place(|| { orb_manager_from_config(&orb_config) .wrap_err("failed to create pin controller") - }) - .await - .wrap_err("task panicked")??; - - crate::boot::reboot(self.recovery, controller) - .await - .wrap_err_with(|| { - format!( - "failed to reboot into {} mode", - if self.recovery { "recovery" } else { "normal" } - ) - }) + })?; + + let orb_mode = if self.recovery { "recovery" } else { "normal" }; + + for i in 1..=self.attempts_count.into() { + if let Err(e) = + crate::boot::reboot(self.recovery, controller.as_mut()).await + { + warn!("Attempt {}, cannot reboot: {}", i, e); + controller = tokio::task::block_in_place(|| { + orb_manager_from_config(&orb_config) + .wrap_err("failed to create pin controller") + })?; + continue; + } + + if !self.make_sure { + return Ok(()); + } + + // some time is required to get into recovery + tokio::time::sleep(Duration::from_secs(5)).await; + + match crate::boot::is_recovery_mode_detected().await { + Err(e) => { + warn!( + "Attempt {}, cannot get into {} mode because of error: {}", + i, orb_mode, e + ); + } + Ok(is_in_rcm) => { + if is_in_rcm != self.recovery { + warn!("Attempt {}, cannot get into {} mode", i, orb_mode); + } else { + info!("Attempt {}, got into {} mode", i, orb_mode); + return Ok(()); + } + } + } + } + + Err(eyre!( + "Cannot get into {} mode with {} attempts", + orb_mode, + self.attempts_count + )) } } From 7d5b23e7788e37971c315c990fdcedab9b65cdd7 Mon Sep 17 00:00:00 2001 From: chrisgalanis <50170911+chrisgalanis@users.noreply.github.com> Date: Mon, 16 Mar 2026 20:58:38 +0100 Subject: [PATCH 18/66] fix: Config File For Serial Debugger (#1080) This pull request updates the pin controller configuration for the `orb-hil` device in the `worldcoin-hil-munich-8` machine setup. The main change is updating the `serial_path` to reference a different USB device. Pin controller configuration update: * Changed the `serial_path` value in `nix/machines/worldcoin-hil-munich-8/configuration.nix` to use `/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG010290-if00-port0` instead of the previous device, ensuring the configuration points to the correct hardware. --- nix/machines/worldcoin-hil-munich-8/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/machines/worldcoin-hil-munich-8/configuration.nix b/nix/machines/worldcoin-hil-munich-8/configuration.nix index bd5314910..39226c1dc 100644 --- a/nix/machines/worldcoin-hil-munich-8/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-8/configuration.nix @@ -25,7 +25,7 @@ # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) pin_ctrl_type: numato_relay - serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_B00370CB-if00-port0" + serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG010290-if00-port0" relay_bank: "/dev/ttyACM0" relay_power_channel: 5 relay_recovery_channel: 6 From 7535dd73fc36b5eea3de2bf6ddfc4538bc67ef71 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Tue, 17 Mar 2026 09:13:52 +0000 Subject: [PATCH 19/66] feat(orb-update-agent): remove stale, unused key (#1060) Removes the stale clientkey, that is not used. It will be removed from orb-os as well --- update-agent/src/settings/args.rs | 3 --- update-agent/src/settings/mod.rs | 1 - update-agent/src/settings/tests.rs | 16 ---------------- update-agent/t/orb_update_agent.conf | 1 - update-agent/t/qemu-runner.js | 1 - 5 files changed, 22 deletions(-) diff --git a/update-agent/src/settings/args.rs b/update-agent/src/settings/args.rs index 1bdc90162..eb79a0854 100644 --- a/update-agent/src/settings/args.rs +++ b/update-agent/src/settings/args.rs @@ -28,9 +28,6 @@ pub struct Args { #[arg(long, value_enum)] #[serde(skip_serializing_if = "Option::is_none")] pub verify_manifest_signature_against: Option, - #[arg(long)] - #[serde(skip_serializing_if = "Option::is_none")] - pub clientkey: Option, /// The workspace destination. #[arg(long, alias = "wd")] #[serde(skip_serializing_if = "Option::is_none")] diff --git a/update-agent/src/settings/mod.rs b/update-agent/src/settings/mod.rs index 613bf3d71..3e5e003c6 100644 --- a/update-agent/src/settings/mod.rs +++ b/update-agent/src/settings/mod.rs @@ -33,7 +33,6 @@ pub struct Settings { pub versions: PathBuf, /// Pub keys are in [`orb_update_agent_core::pubkeys`] pub verify_manifest_signature_against: Backend, - pub clientkey: PathBuf, pub active_slot: Slot, pub workspace: PathBuf, pub downloads: PathBuf, diff --git a/update-agent/src/settings/tests.rs b/update-agent/src/settings/tests.rs index 24b706f04..5a53d60e2 100644 --- a/update-agent/src/settings/tests.rs +++ b/update-agent/src/settings/tests.rs @@ -14,7 +14,6 @@ use crate::settings::{Backend, Settings}; const CFG_FILE_CONTENTS_TRUTHY: &str = r#" versions = "/config/versions" verify_manifest_signature_against = "stage" - clientkey = "/config/clientkey" workspace = "/config/workspace" downloads = "/config/downloads" id = "/config/id" @@ -29,7 +28,6 @@ const CFG_FILE_CONTENTS_TRUTHY: &str = r#" const CFG_FILE_CONTENTS_FALSY: &str = r#" versions = "/config/versions" verify_manifest_signature_against = "stage" - clientkey = "/config/clientkey" workspace = "/config/workspace" downloads = "/config/downloads" id = "/config/id" @@ -50,7 +48,6 @@ fn make_args(args: &str) -> Result { /// boolean values are set to the value given by `set_bools_to`. fn set_env(jail: &mut Jail, set_bools_to: bool) { let bool_str = if set_bools_to { "true" } else { "false" }; - jail.set_env("update_agent_clientkey", "/env/clientkey"); jail.set_env("update_agent_workspace", "/env/workspace"); jail.set_env("update_agent_downloads", "/env/downloads"); jail.set_env("update_agent_id", "/env/id"); @@ -68,7 +65,6 @@ fn set_env(jail: &mut Jail, set_bools_to: bool) { fn test_cli_args_override_config_file_and_env_vars() { const CLI_ARGS: &str = r#" update_agent - --clientkey /args/clientkey --workspace /args/workspace --downloads /args/downloads --id /args/id @@ -91,7 +87,6 @@ fn test_cli_args_override_config_file_and_env_vars() { let crate::Settings { versions, verify_manifest_signature_against, - clientkey, active_slot, workspace, downloads, @@ -105,7 +100,6 @@ fn test_cli_args_override_config_file_and_env_vars() { token, } = Settings::get(&args, "config.toml", "update_agent_", current_slot)?; assert_eq!(active_slot, current_slot); - assert_eq!(clientkey.as_os_str(), args.clientkey.unwrap().as_str()); assert_eq!(workspace.as_os_str(), args.workspace.unwrap().as_str()); assert_eq!(downloads.as_os_str(), args.downloads.unwrap().as_str()); assert_eq!(id, args.id.unwrap()); @@ -135,7 +129,6 @@ fn test_cli_args_override_config_file_and_env_vars() { fn test_cli_args_override_config_file() { const CLI_ARGS: &str = r#" update_agent - --clientkey /args/clientkey --workspace /args/workspace --downloads /args/downloads --id /args/id @@ -157,7 +150,6 @@ fn test_cli_args_override_config_file() { let crate::Settings { versions, verify_manifest_signature_against, - clientkey, active_slot, workspace, downloads, @@ -171,7 +163,6 @@ fn test_cli_args_override_config_file() { token, } = Settings::get(&args, "config.toml", "update_agent_", current_slot)?; assert_eq!(active_slot, current_slot); - assert_eq!(clientkey.as_os_str(), args.clientkey.unwrap().as_str()); assert_eq!(workspace.as_os_str(), args.workspace.unwrap().as_str()); assert_eq!(downloads.as_os_str(), args.downloads.unwrap().as_str()); assert_eq!(id, args.id.unwrap()); @@ -205,7 +196,6 @@ fn test_only_setting_config_file_works() { let crate::Settings { versions, verify_manifest_signature_against, - clientkey, active_slot, workspace, downloads, @@ -219,7 +209,6 @@ fn test_only_setting_config_file_works() { token, } = Settings::get(&args, "config.toml", "update_agent_", Slot::A)?; assert_eq!(active_slot, Slot::A); - assert_eq!(clientkey, Path::new("/config/clientkey")); assert_eq!(workspace, Path::new("/config/workspace")); assert_eq!(downloads, Path::new("/config/downloads")); assert_eq!(id, "/config/id"); @@ -249,7 +238,6 @@ fn test_env_override_config_file() { let crate::Settings { versions, verify_manifest_signature_against, - clientkey, active_slot, workspace, downloads, @@ -263,7 +251,6 @@ fn test_env_override_config_file() { token, } = Settings::get(&args, "config.toml", "update_agent_", current_slot)?; assert_eq!(active_slot, current_slot); - assert_eq!(clientkey, Path::new("/env/clientkey")); assert_eq!(workspace, Path::new("/env/workspace")); assert_eq!(downloads, Path::new("/env/downloads")); assert_eq!(id, "/env/id"); @@ -288,7 +275,6 @@ const PROD_CFG_FILE_CONTENTS: &str = r#" components = "/config/components" verify_manifest_signature_against = "prod" cacert = "/config/downloads" - clientkey = "/config/clientkey" update_location = "/config/update_location" workspace = "/config/workspace" downloads = "/config/downloads" @@ -313,7 +299,6 @@ fn production_config() { let crate::Settings { versions, verify_manifest_signature_against, - clientkey, active_slot, workspace, downloads, @@ -327,7 +312,6 @@ fn production_config() { token, } = Settings::get(&args, "config.toml", "update_agent_", Slot::A)?; assert_eq!(active_slot, Slot::A); - assert_eq!(clientkey, Path::new("/config/clientkey")); assert_eq!(workspace, Path::new("/config/workspace")); assert_eq!(downloads, Path::new("/config/downloads")); assert_eq!(id, "/args/id"); diff --git a/update-agent/t/orb_update_agent.conf b/update-agent/t/orb_update_agent.conf index b6a26d466..908349645 100644 --- a/update-agent/t/orb_update_agent.conf +++ b/update-agent/t/orb_update_agent.conf @@ -1,7 +1,6 @@ versions = "/usr/persistent/versions.json" components = "/usr/persistent/components.json" cacert = "/etc/ssl/worldcoin-staging-ota.pem" -clientkey = "/etc/ssl/private/worldcoin-staging-ota-identity.key" update_location = "/mnt/claim.json" workspace = "/mnt/scratch" downloads = "/mnt/scratch/downloads" diff --git a/update-agent/t/qemu-runner.js b/update-agent/t/qemu-runner.js index 36af9bfe4..6dd1636e1 100755 --- a/update-agent/t/qemu-runner.js +++ b/update-agent/t/qemu-runner.js @@ -363,7 +363,6 @@ write_files: versions = "/usr/persistent/versions.json" components = "/usr/persistent/components.json" cacert = "/etc/ssl/worldcoin-staging-ota.pem" - clientkey = "/etc/ssl/private/worldcoin-staging-ota-identity.key" update_location = "/mnt/claim.json" workspace = "/mnt/scratch" downloads = "/mnt/scratch/downloads" From 1002fd0181b9face0a9e2d62db28587906b7be01 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 17 Mar 2026 12:09:13 +0100 Subject: [PATCH 20/66] chore(hil): bump orb-hil to beta-18 (#1082) bump --- Cargo.lock | 4 ++-- nix/packages/orb-hil.nix | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a6a58f75..2292a033c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6566,9 +6566,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" dependencies = [ "twox-hash", ] diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index eae38de30..d72d59bc3 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.17"; + version = "0.0.2-beta.18"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-+kAq2nyJInqQg/RSGn06Zncgc0G4prf+/YxA1B6ro0A="; + sha256 = "sha256-LRz71fDnHIMH0VYiimGEBOP6+kW8gGvNgBjRspzQDko="; }; dontUnpack = true; From ac03611c93fedfd4ec8f0fae4d1c2b1a2dfda732 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 17 Mar 2026 13:46:08 +0100 Subject: [PATCH 21/66] feat: support usb-eth in orb normal mode (#1081) This way we can access an orb via usb-eth when it's booted. Also requires a simultaneous change in orb-os to replace orbeth0 Good run: https://github.com/worldcoin/orb-os/actions/runs/23190117700/job/67383711728#step:3:16642 --- nix/machines/hil-common.nix | 52 ++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 4dfc2c812..18ecf9213 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -10,7 +10,7 @@ let username = "worldcoin"; ghRunnerUser = "gh-runner-user"; orb-hil = pkgs.callPackage ../packages/orb-hil.nix { }; - mkConnection = ( + mkRcmConnection = ( number: let n = builtins.toString number; @@ -20,7 +20,7 @@ let connection = { autoconnect-priority = "-999"; id = "Orb RCM Ethernet ${n}"; - interface-name = "orbeth${n}"; + interface-name = "orbrcm${n}"; type = "ethernet"; }; ethernet = { }; @@ -35,6 +35,31 @@ let }; } ); + mkNrmConnection = ( + number: + let + n = builtins.toString number; + in + { + "Orb NRM Ethernet ${n}" = { + connection = { + autoconnect-priority = "-999"; + id = "Orb NRM Ethernet ${n}"; + interface-name = "orbnrm${n}"; + type = "ethernet"; + }; + ethernet = { }; + ipv4 = { + method = "manual"; + address1 = "192.168.55.3/24"; + }; + ipv6 = { + method = "disabled"; + }; + proxy = { }; + }; + } + ); in { options.worldcoin.orbPlatform = lib.mkOption { @@ -64,19 +89,26 @@ in # Enable networking networking.networkmanager.enable = true; networking.networkmanager.ensureProfiles.profiles = lib.attrsets.mergeAttrsList [ - (mkConnection 0) - (mkConnection 1) - (mkConnection 2) - (mkConnection 3) + (mkRcmConnection 0) + (mkNrmConnection 0) ]; # Give the jetson USB ethernet a known name services.udev.extraRules = '' + # recovery ACTION=="add", \ SUBSYSTEM=="net", \ SUBSYSTEMS=="usb", \ ATTRS{idVendor}=="0955", \ ATTRS{idProduct}=="7035", \ - NAME="orbeth%n" + NAME="orbrcm%n" + + # pearl normal + ACTION=="add", \ + SUBSYSTEM=="net", \ + SUBSYSTEMS=="usb", \ + ATTRS{idVendor}=="0955", \ + ATTRS{idProduct}=="7020", \ + NAME="orbnrm%n" # Allow plugdev group to access USB relay hidraw devices KERNEL=="hidraw*", SUBSYSTEM=="hidraw", MODE="0664", GROUP="plugdev" @@ -193,6 +225,12 @@ in }; }; + services.avahi = { + enable = true; + nssmdns4 = true; + openFirewall = true; + }; + services.teleport = { enable = true; package = pkgs.teleport_17; From f368bdda2f2f2283f023950606f0f85296e5589c Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Tue, 17 Mar 2026 09:51:44 -0400 Subject: [PATCH 22/66] feat(se050-reprovision): initial scaffolding (#1079) Creates a new one-shot called orb-se050-reprovision. This PR is just the scaffolding, its not the full implementation. --- Cargo.lock | 20 ++++++ Cargo.toml | 1 + se050-reprovision/Cargo.toml | 35 +++++++++++ se050-reprovision/build.rs | 3 + ...vision.worldcoin-se050-reprovision.service | 13 ++++ se050-reprovision/src/cli.rs | 50 +++++++++++++++ se050-reprovision/src/lib.rs | 63 +++++++++++++++++++ se050-reprovision/src/main.rs | 48 ++++++++++++++ se050-reprovision/src/remote_api.rs | 25 ++++++++ 9 files changed, 258 insertions(+) create mode 100644 se050-reprovision/Cargo.toml create mode 100644 se050-reprovision/build.rs create mode 100644 se050-reprovision/debian/orb-se050-reprovision.worldcoin-se050-reprovision.service create mode 100644 se050-reprovision/src/cli.rs create mode 100644 se050-reprovision/src/lib.rs create mode 100644 se050-reprovision/src/main.rs create mode 100644 se050-reprovision/src/remote_api.rs diff --git a/Cargo.lock b/Cargo.lock index 2292a033c..4e130f7f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8584,6 +8584,26 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "orb-se050-reprovision" +version = "0.0.0" +dependencies = [ + "base64 0.22.1", + "clap", + "color-eyre", + "orb-build-info", + "orb-const-concat", + "orb-endpoints", + "orb-security-utils", + "orb-telemetry", + "rand 0.8.5", + "reqwest 0.12.24", + "serde", + "serde_json", + "tokio", + "tracing", +] + [[package]] name = "orb-secure-storage-ca" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index 361c4987b..a76f5094a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ members = [ "s3-helpers", "se050", "security-utils", + "se050-reprovision", "seek-camera/sys", "seek-camera/wrapper", "slot-ctrl", diff --git a/se050-reprovision/Cargo.toml b/se050-reprovision/Cargo.toml new file mode 100644 index 000000000..822f10b65 --- /dev/null +++ b/se050-reprovision/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "orb-se050-reprovision" +version = "0.0.0" +description = "Oneshot service that reprovisions the SE050 secure element" +authors = ["Ryan Butler "] +publish = false + +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +base64.workspace = true +clap.workspace = true +color-eyre.workspace = true +orb-build-info.workspace = true +orb-const-concat.workspace = true +orb-endpoints.workspace = true +orb-security-utils = { workspace = true, features = ["reqwest"] } +orb-telemetry.workspace = true +rand.workspace = true +reqwest.workspace = true +serde.workspace = true +serde_json.workspace = true +tokio.workspace = true +tracing.workspace = true + +[build-dependencies] +orb-build-info = { workspace = true, features = ["build-script"] } + +[package.metadata.deb] +assets = [["target/release/orb-se050-reprovision", "/usr/local/bin/", "755"]] +maintainer-scripts = "debian/" +systemd-units = [{ unit-name = "worldcoin-se050-reprovision" }] diff --git a/se050-reprovision/build.rs b/se050-reprovision/build.rs new file mode 100644 index 000000000..0dedb5e9b --- /dev/null +++ b/se050-reprovision/build.rs @@ -0,0 +1,3 @@ +fn main() { + orb_build_info::initialize().expect("failed to detect build info") +} diff --git a/se050-reprovision/debian/orb-se050-reprovision.worldcoin-se050-reprovision.service b/se050-reprovision/debian/orb-se050-reprovision.worldcoin-se050-reprovision.service new file mode 100644 index 000000000..1a0e4819e --- /dev/null +++ b/se050-reprovision/debian/orb-se050-reprovision.worldcoin-se050-reprovision.service @@ -0,0 +1,13 @@ +[Unit] +Description=SE050 reprovisioning service +After=persistent.target + +[Service] +Type=oneshot +User=worldcoin +Environment=RUST_BACKTRACE=1 +SyslogIdentifier=worldcoin-se050-reprovision +ExecStart=/usr/local/bin/orb-se050-reprovision + +[Install] +WantedBy=multi-user.target diff --git a/se050-reprovision/src/cli.rs b/se050-reprovision/src/cli.rs new file mode 100644 index 000000000..18ee34910 --- /dev/null +++ b/se050-reprovision/src/cli.rs @@ -0,0 +1,50 @@ +use std::process::Stdio; + +use color_eyre::eyre::{Context, Result}; +use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; + +use crate::Config; + +#[derive(Debug, Serialize, Deserialize)] +pub struct CliOutput { + jetson_authkey: KeyInfo, + attestation_key: KeyInfo, + iris_code_key: KeyInfo, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct KeyInfo { + /// PEM format + key: String, + #[serde(with = "crate::base64_serde")] + signature: Vec, + #[serde(with = "crate::base64_serde")] + extra_data: Vec, + active: bool, +} + +pub async fn call(cfg: &Config, nonce: u128) -> Result { + let mut child = tokio::process::Command::new(&cfg.ca_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .wrap_err("failed to spawn")?; + let mut stdin = child.stdin.take().expect("infallible"); + let mut stdout = child.stdout.take().expect("infallible"); + + stdin + .write_all(&nonce.to_be_bytes()) + .await + .wrap_err("failed to write nonce to stdin")?; + stdin.shutdown().await?; + drop(stdin); + + let mut output = String::new(); + stdout + .read_to_string(&mut output) + .await + .wrap_err("failed to read from stdout")?; + + serde_json::from_str(&output).wrap_err("failed to deserialize stdout as json") +} diff --git a/se050-reprovision/src/lib.rs b/se050-reprovision/src/lib.rs new file mode 100644 index 000000000..f3fc87f6b --- /dev/null +++ b/se050-reprovision/src/lib.rs @@ -0,0 +1,63 @@ +#![forbid(unsafe_code)] + +mod cli; +pub mod remote_api; + +use std::path::PathBuf; + +use color_eyre::{eyre::WrapErr as _, Result}; +use orb_build_info::{make_build_info, BuildInfo}; +use rand::{rngs::StdRng, RngCore}; +use tracing::info; + +pub const SYSLOG_IDENTIFIER: &str = "worldcoin-se050-reprovision"; +pub const BUILD_INFO: BuildInfo = make_build_info!(); + +#[derive(Debug, Clone)] +pub struct Config { + pub rng: StdRng, + pub base_url: String, + pub client: crate::remote_api::Client, + /// Path to the CA that performs the re-enrollment + pub ca_path: PathBuf, +} + +pub async fn run(mut cfg: Config) -> Result<()> { + info!("orb-se050-reprovision version {}", BUILD_INFO.version); + + // TODO: Make this code not dummy stubbed code. For now we just call the reprovision + // CLI with some bogus nonce. + let mut nonce = [0; 16]; + cfg.rng.fill_bytes(&mut nonce); + let nonce = u128::from_le_bytes(nonce); + let output = crate::cli::call(&cfg, nonce) + .await + .wrap_err("failed to call cli")?; + info!("cli output: {output:?}"); + + Ok(()) +} + +mod base64_serde { + use base64::engine::general_purpose::STANDARD; + use base64::Engine as _; + use serde::{Deserialize, Deserializer, Serializer}; + + pub fn serialize(value: &[u8], serializer: S) -> Result + where + S: Serializer, + { + let encoded = STANDARD.encode(value); + serializer.serialize_str(&encoded) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + let encoded = String::deserialize(deserializer)?; + STANDARD + .decode(encoded.as_bytes()) + .map_err(serde::de::Error::custom) + } +} diff --git a/se050-reprovision/src/main.rs b/se050-reprovision/src/main.rs new file mode 100644 index 000000000..e42ab06e1 --- /dev/null +++ b/se050-reprovision/src/main.rs @@ -0,0 +1,48 @@ +use std::path::PathBuf; + +use clap::Parser; +use color_eyre::{eyre::WrapErr as _, Result}; +use orb_endpoints::Backend; +use orb_se050_reprovision::{Config, BUILD_INFO}; +use rand::{rngs::StdRng, SeedableRng}; + +#[derive(Debug, Parser)] +#[clap(version = BUILD_INFO.version, about)] +pub struct Args {} + +impl Args { + fn make_config(self, backend: Backend) -> Result { + let subdomain = match backend { + Backend::Prod => "orb", + Backend::Staging => "stage.orb", + Backend::Analysis => "analysis.ml", + Backend::Local => unreachable!(), + }; + + Ok(Config { + base_url: format!("https://auth.{subdomain}.worldcoin.org"), + client: orb_se050_reprovision::remote_api::Client::new()?, + ca_path: PathBuf::from("/usr/local/bin/orb-se050-reprovision-ca"), + rng: StdRng::from_entropy(), + }) + } +} + +#[tokio::main] +async fn main() -> Result<()> { + color_eyre::install()?; + let telemetry = orb_telemetry::TelemetryConfig::new() + .with_journald(orb_se050_reprovision::SYSLOG_IDENTIFIER) + .init(); + + let args = Args::parse(); + let backend = + Backend::from_env().wrap_err("failed to determine backend from env var")?; + let config = args + .make_config(backend) + .wrap_err("failed to create config")?; + let result = orb_se050_reprovision::run(config).await; + + telemetry.flush().await; + result +} diff --git a/se050-reprovision/src/remote_api.rs b/se050-reprovision/src/remote_api.rs new file mode 100644 index 000000000..0287656fd --- /dev/null +++ b/se050-reprovision/src/remote_api.rs @@ -0,0 +1,25 @@ +use color_eyre::eyre::{Result, WrapErr as _}; +use orb_const_concat::const_concat; + +use crate::BUILD_INFO; + +const USER_AGENT: &str = const_concat!( + "orb-se050-reprovision/", + BUILD_INFO.cargo.pkg_version, + "-", + BUILD_INFO.git.describe, +); + +#[derive(Debug, Clone)] +pub struct Client(pub reqwest::Client); + +impl Client { + pub fn new() -> Result { + Ok(Self( + orb_security_utils::reqwest::http_client_builder() + .user_agent(USER_AGENT) + .build() + .wrap_err("failed to create http client")?, + )) + } +} From 58351f27b3a11a50b1f235133d5363802eb5cf14 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 17 Mar 2026 17:55:11 +0100 Subject: [PATCH 23/66] chore: add zsync to the hil packages (#1085) required for faster rts downloads --- nix/machines/hil-common.nix | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 18ecf9213..6d2e0d08f 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -69,9 +69,10 @@ in }; config = { - # Install orb-hil systemwide + # Install test-related packages environment.systemPackages = [ orb-hil + pkgs.zsync ]; networking.hostName = "${hostname}"; From 3c387b7c731b0f7f68385492fe38348afa509f6a Mon Sep 17 00:00:00 2001 From: vmenge Date: Tue, 17 Mar 2026 23:32:20 +0100 Subject: [PATCH 24/66] fix(agentwire): tests hanging (#1086) a possible candidate as the reason `orb-core` tests are hanging in case of timeout we just left the child is left running around forever (ominous) --- agentwire/src/testing_rt.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/agentwire/src/testing_rt.rs b/agentwire/src/testing_rt.rs index 0bed67376..462b73b12 100644 --- a/agentwire/src/testing_rt.rs +++ b/agentwire/src/testing_rt.rs @@ -79,10 +79,13 @@ pub fn run_broker_test( .env(agent::process::ARGS_ENV, shell_words::join(&child_args)) .spawn() .unwrap(); - time::timeout(timeout, child.wait()) - .await - .expect("timeouted") - .unwrap() + + if let Ok(status) = time::timeout(timeout, child.wait()).await { + status.unwrap() + } else { + let _ = child.kill().await; + panic!("timeouted"); + } }); assert!(result.success(), "test failed"); } From 586f283664dc00faae12d8c7012fa1eeb79c6ac8 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 18 Mar 2026 22:48:58 +0100 Subject: [PATCH 25/66] feat(orb-backend-status): add orb_stand_qr_id to CoreStats (#1088) --- orb-backend-status/dbus/src/types.rs | 1 + orb-backend-status/src/backend/status.rs | 4 ++++ orb-backend-status/src/backend/types.rs | 1 + 3 files changed, 6 insertions(+) diff --git a/orb-backend-status/dbus/src/types.rs b/orb-backend-status/dbus/src/types.rs index 75037aa57..230c288e4 100644 --- a/orb-backend-status/dbus/src/types.rs +++ b/orb-backend-status/dbus/src/types.rs @@ -122,6 +122,7 @@ pub struct CoreStats { pub ssd: Ssd, pub version: OrbVersion, pub mac_address: String, + pub orb_stand_qr_id: Option, } #[allow(missing_docs)] diff --git a/orb-backend-status/src/backend/status.rs b/orb-backend-status/src/backend/status.rs index 65cb6a04d..57bab068c 100644 --- a/orb-backend-status/src/backend/status.rs +++ b/orb-backend-status/src/backend/status.rs @@ -296,6 +296,10 @@ async fn build_status_request_v2( }), main_mcu: build_main_mcu_api(current_status), oes: None, + orb_stand_qr_id: current_status + .core_stats + .as_ref() + .and_then(|core_stats| core_stats.orb_stand_qr_id.clone()), timestamp: Utc::now(), }) } diff --git a/orb-backend-status/src/backend/types.rs b/orb-backend-status/src/backend/types.rs index 5c53223f7..7e1a670e3 100644 --- a/orb-backend-status/src/backend/types.rs +++ b/orb-backend-status/src/backend/types.rs @@ -36,6 +36,7 @@ pub struct OrbStatusApiV2 { // orb event stream #[serde(skip_serializing_if = "Option::is_none")] pub oes: Option>, + pub orb_stand_qr_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] From 33967e554924e653b7226490ef10c3120ff501b0 Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Wed, 18 Mar 2026 19:20:25 -0400 Subject: [PATCH 26/66] chore(nix): added aws cli (#1090) useful for devcontainers --- nix/shells/development.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nix/shells/development.nix b/nix/shells/development.nix index 8771ab1ad..7c22ad150 100644 --- a/nix/shells/development.nix +++ b/nix/shells/development.nix @@ -125,6 +125,7 @@ in # venv uv # python venv management + awscli2 bacon # better cargo-watch black # Python autoformatter cargo-binutils # Contains common native development utilities From 123f7ac2b8d0e6c534383a77a0d39778f9ff54b3 Mon Sep 17 00:00:00 2001 From: AlexKaravaev <30314738+AlexKaravaev@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:03:52 +0100 Subject: [PATCH 27/66] fix: retry delay logic in orb-attest (#1089) fix so that retry with backoff is only applied when we have problems with signing, not fetching token from the backend - [x] Test happy path on orb - [x] Test no connection to endpoint on orb --- attest/src/remote_api.rs | 95 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 6 deletions(-) diff --git a/attest/src/remote_api.rs b/attest/src/remote_api.rs index e234cdf6e..af8b8a52e 100644 --- a/attest/src/remote_api.rs +++ b/attest/src/remote_api.rs @@ -116,6 +116,41 @@ pub enum RefreshTokenError { JoinError(#[source] tokio::task::JoinError), } +impl SignError { + fn requires_security_mcu_cooldown(&self) -> bool { + matches!( + self, + Self::SignFailed | Self::Timeout | Self::CommunicationError + ) + } +} + +impl RefreshTokenError { + fn retry_delay(&self, current_delay: time::Duration) -> time::Duration { + if matches!( + self, + Self::SignError(sign_error) + if sign_error.requires_security_mcu_cooldown() + ) { + current_delay + } else { + MIN_TOKEN_DELAY + } + } + + fn next_retry_delay(&self, current_delay: time::Duration) -> time::Duration { + if matches!( + self, + Self::SignError(sign_error) + if sign_error.requires_security_mcu_cooldown() + ) { + (current_delay * 2).min(MAX_TOKEN_DELAY) + } else { + MIN_TOKEN_DELAY + } + } +} + /// helper for concealing part of a secret from the log. /// splits the secret in three parts and print the first and last part fn format_secret(val: &str) -> String { @@ -512,9 +547,11 @@ async fn get_token_inner( Ok(token) } -/// Try to refresh the token until succeeds -/// The delay between attempts grows exponentially with each failure, starting from `MIN_TOKEN_DELAY` and up to `MAX_TOKEN_DELAY` -/// to reduce the telemetry load. +/// Try to refresh the token until succeeds. +/// Backend-related failures retry after `MIN_TOKEN_DELAY`. +/// Signing failures that may require SE050 or Security MCU recovery keep an +/// exponential backoff, capped at `MAX_TOKEN_DELAY`, to reduce telemetry load +/// while recovery is in progress. /// /// Panics /// @@ -531,9 +568,10 @@ pub async fn get_token(orb_id: &str, base_url: &Url) -> Token { return token; } Err(e) => { - error!("failed to get token: {e}, retrying in {delay:?}"); - sleep(delay).await; - delay = (delay * 2).min(MAX_TOKEN_DELAY); + let retry_delay = e.retry_delay(delay); + error!("failed to get token: {e}, retrying in {retry_delay:?}"); + sleep(retry_delay).await; + delay = e.next_retry_delay(delay); } } } @@ -542,8 +580,13 @@ pub async fn get_token(orb_id: &str, base_url: &Url) -> Token { #[cfg(test)] mod test { use std::os::unix::fs::PermissionsExt; + use std::time::Duration; + use super::{ + ChallengeError, RefreshTokenError, SignError, MAX_TOKEN_DELAY, MIN_TOKEN_DELAY, + }; use data_encoding::BASE64; + use reqwest::StatusCode; use secrecy::ExposeSecret; use wiremock::{ matchers::{method, path}, @@ -641,4 +684,44 @@ printf dmFsaWRzaWduYXR1cmU= .unwrap(); assert_eq!(server_token, token.token.expose_secret()); } + + #[test] + fn backend_failures_keep_short_retry_delay() { + let error = + RefreshTokenError::ChallengeError(ChallengeError::ServerReturnedError( + StatusCode::SERVICE_UNAVAILABLE, + "backend unavailable".to_string(), + )); + + assert_eq!(error.retry_delay(MAX_TOKEN_DELAY), MIN_TOKEN_DELAY); + assert_eq!(error.next_retry_delay(MAX_TOKEN_DELAY), MIN_TOKEN_DELAY); + } + + #[test] + fn se050_failures_keep_exponential_retry_delay() { + let current_delay = Duration::from_secs(40); + let error = RefreshTokenError::SignError(SignError::Timeout); + + assert_eq!(error.retry_delay(current_delay), current_delay); + assert_eq!( + error.next_retry_delay(current_delay), + Duration::from_secs(80) + ); + } + + #[test] + fn se050_failures_cap_retry_delay() { + let error = RefreshTokenError::SignError(SignError::CommunicationError); + + assert_eq!(error.next_retry_delay(MAX_TOKEN_DELAY), MAX_TOKEN_DELAY); + } + + #[test] + fn non_se050_sign_failures_do_not_back_off_exponentially() { + let current_delay = Duration::from_secs(40); + let error = RefreshTokenError::SignError(SignError::InternalError); + + assert_eq!(error.retry_delay(current_delay), MIN_TOKEN_DELAY); + assert_eq!(error.next_retry_delay(current_delay), MIN_TOKEN_DELAY); + } } From a1a1acc56359fbe98e2748b44a751de1cbe40cd8 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Thu, 19 Mar 2026 10:16:38 +0000 Subject: [PATCH 28/66] feat(hil-ssh): add private ssh key step to deploy hil (#1083) Adds a step to the deploy hil workflow. This step will get the private ssh key from gh-secrets, and put it into /`etc/worldcoin/secrets/hil-ssh-key` It will be accessible to read by any user in the wheel group After this get's merged, orb-os workflow will use this ssh key to connect to the orb (public key will be commited in orb-os, the orb will be aware of this key) --- .github/workflows/deploy-hil.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/deploy-hil.yaml b/.github/workflows/deploy-hil.yaml index e32e5b91b..5fd6dddcd 100644 --- a/.github/workflows/deploy-hil.yaml +++ b/.github/workflows/deploy-hil.yaml @@ -45,6 +45,25 @@ jobs: - name: Deploy run: sudo nixos-rebuild switch --flake . + - name: Install HIL SSH key + env: + HIL_SSH_PRIVATE_KEY: ${{ secrets.HIL_PRIVATE_SSH_KEY }} + HIL_SSH_SYSTEM_KEY_PATH: /etc/worldcoin/secrets/hil-ssh-key + run: | + set -Eeuo pipefail + + tmp_key="$(mktemp)" + trap 'rm -f "${tmp_key}"' EXIT + printf '%s\n' "${HIL_SSH_PRIVATE_KEY}" > "${tmp_key}" + + sudo install -d -m 750 -o root -g wheel "$(dirname "${HIL_SSH_SYSTEM_KEY_PATH}")" + if ! sudo test -f "${HIL_SSH_SYSTEM_KEY_PATH}" || ! sudo cmp -s "${tmp_key}" "${HIL_SSH_SYSTEM_KEY_PATH}"; then + sudo install -m 640 -o root -g wheel "${tmp_key}" "${HIL_SSH_SYSTEM_KEY_PATH}" + else + sudo chown root:wheel "${HIL_SSH_SYSTEM_KEY_PATH}" + sudo chmod 640 "${HIL_SSH_SYSTEM_KEY_PATH}" + fi + - name: Summary if: always() run: | From 74e4f316f15913fb606cdaeba26f4ce387f6fef4 Mon Sep 17 00:00:00 2001 From: chrisgalanis <50170911+chrisgalanis@users.noreply.github.com> Date: Thu, 19 Mar 2026 13:32:35 +0100 Subject: [PATCH 29/66] feat: Service For HIL Orchistrator Service (#1092) This pull request introduces new support for configuring and running the `orb-hil-agent` service on the machine, allowing integration with the orb-hil-orchestrator server. The changes focus on adding a configurable orchestrator URL and conditionally enabling the agent service based on this configuration. Configuration enhancements: * Added a new option `worldcoin.hilOrchestratorUrl` to allow specifying the URL of the orb-hil-orchestrator server in `hil-common.nix`. Service management improvements: * Added a new systemd service definition for `orb-hil-agent`, which is enabled only if `worldcoin.hilOrchestratorUrl` is set. The service is configured to use the orchestrator URL, run as the specified user, and restart automatically. --- nix/machines/hil-common.nix | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 6d2e0d08f..78088474f 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -68,6 +68,12 @@ in description = "The orb platform (e.g. pearl, diamond). Adds a 'worldcoin-hil-' label to the GitHub runner if set."; }; + options.worldcoin.hilOrchestratorUrl = lib.mkOption { + type = lib.types.nullOr lib.types.str; + default = "http://10.108.4.25:8080"; + description = "URL of the orb-hil-orchestrator server."; + }; + config = { # Install test-related packages environment.systemPackages = [ @@ -242,6 +248,24 @@ in services.mullvad-vpn.enable = true; services.tailscale.enable = true; + systemd.services.orb-hil-agent = lib.mkIf (config.worldcoin.hilOrchestratorUrl != null) { + description = "Worldcoin HIL Agent"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "simple"; + User = username; + Environment = "ORCHESTRATOR_URL=${config.worldcoin.hilOrchestratorUrl}"; + ExecStart = '' + /home/${username}/orb-hil-agent \ + --results-dir /var/lib/hil-agent/results \ + --orb-config-path /etc/worldcoin/orb.yaml + ''; + Restart = "on-failure"; + RestartSec = 5; + }; + }; + systemd.services."github-runner-${hostname}" = { serviceConfig = { InaccessiblePaths = lib.mkForce [ ]; From a69707214f56c02b1dd0e532a7108216c447b380 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Thu, 19 Mar 2026 14:55:07 +0100 Subject: [PATCH 30/66] feat: ping command in orb-hil (#1084) nice for testing, as it takes all data from the config --- hil/src/commands/mod.rs | 2 ++ hil/src/commands/ping.rs | 59 ++++++++++++++++++++++++++++++++++++++++ hil/src/main.rs | 2 ++ 3 files changed, 63 insertions(+) create mode 100644 hil/src/commands/ping.rs diff --git a/hil/src/commands/mod.rs b/hil/src/commands/mod.rs index 7ae1c2298..84263b332 100644 --- a/hil/src/commands/mod.rs +++ b/hil/src/commands/mod.rs @@ -6,6 +6,7 @@ mod login; mod mcu; mod nfsboot; mod ota; +mod ping; mod reboot; mod set_recovery_pin; @@ -17,5 +18,6 @@ pub use self::login::Login; pub use self::mcu::Mcu; pub use self::nfsboot::Nfsboot; pub use self::ota::Ota; +pub use self::ping::Ping; pub use self::reboot::Reboot; pub use self::set_recovery_pin::SetRecoveryPin; diff --git a/hil/src/commands/ping.rs b/hil/src/commands/ping.rs new file mode 100644 index 000000000..d1bf2f9dc --- /dev/null +++ b/hil/src/commands/ping.rs @@ -0,0 +1,59 @@ +use clap::Parser; +use color_eyre::{eyre::eyre, eyre::WrapErr as _, Result}; +use tokio::time::{timeout, Duration}; +use tracing::debug; + +use crate::orb::OrbConfig; + +/// Wait until the orb is reachable via mDNS ping +#[derive(Debug, Parser)] +pub struct Ping { + #[arg(long, default_value = "120s", value_parser = humantime::parse_duration)] + timeout: Duration, + + #[arg(long, default_value = "1s", value_parser = humantime::parse_duration)] + interval: Duration, + + #[command(flatten)] + orb_config: OrbConfig, +} + +impl Ping { + pub async fn run(self) -> Result<()> { + let orb_config = self.orb_config.use_file_if_exists()?; + let hostname = orb_config + .get_hostname() + .ok_or_else(|| eyre!("orb-id or hostname must be specified"))?; + + let poll = async { + loop { + let status = tokio::process::Command::new("ping") + .args(["-c", "1", "-W", "1", &hostname]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .await + .wrap_err("failed to spawn ping")?; + + if status.success() { + return Ok::<(), color_eyre::Report>(()); + } + + debug!("ping to {hostname} failed, retrying..."); + tokio::time::sleep(self.interval).await; + } + }; + + match timeout(self.timeout, poll).await { + Ok(result) => { + result?; + println!("Orb is reachable"); + Ok(()) + } + Err(_) => { + println!("Orb unreachable after 2 minutes"); + Err(eyre!("timed out waiting for orb to become reachable")) + } + } + } +} diff --git a/hil/src/main.rs b/hil/src/main.rs index c9376b062..be58c0f3b 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -27,6 +27,7 @@ enum Commands { Mcu(commands::Mcu), Nfsboot(commands::Nfsboot), Ota(commands::Ota), + Ping(commands::Ping), Reboot(commands::Reboot), SetRecoveryPin(commands::SetRecoveryPin), } @@ -64,6 +65,7 @@ async fn main() -> Result<()> { Commands::Mcu(c) => c.run().await, Commands::Nfsboot(c) => c.run().await, Commands::Ota(c) => c.run().await, + Commands::Ping(c) => c.run().await, Commands::Reboot(c) => c.run().await, Commands::SetRecoveryPin(c) => c.run().await, } From ca82577cffbc845f488aa60d76ef1af63238d851 Mon Sep 17 00:00:00 2001 From: vmenge Date: Thu, 19 Mar 2026 18:03:19 +0100 Subject: [PATCH 31/66] chore: developing-with-orbs agent skill (#1096) ## new - `developing-with-orbs` skill for agents skill should help guiding agents on interacting with orbs and doing things such as: - deploying binaries orb - sshing to an orb - scping files to an orb - using orb-hil - using mcu-util image --- .agents/skills/developing-with-orbs/SKILL.md | 232 +++++++++++++++++++ AGENTS.md | 10 + 2 files changed, 242 insertions(+) create mode 100644 .agents/skills/developing-with-orbs/SKILL.md diff --git a/.agents/skills/developing-with-orbs/SKILL.md b/.agents/skills/developing-with-orbs/SKILL.md new file mode 100644 index 000000000..456aafeaa --- /dev/null +++ b/.agents/skills/developing-with-orbs/SKILL.md @@ -0,0 +1,232 @@ +--- +name: developing-with-orbs +description: Use when working against a physical Orb from this repo, especially for SSH or scp access, deploying crates with cargo x deploy, using the orb-hil CLI, or running orb-mcu-util workflows +--- + +# Developing With Orbs + +## Overview + +Use this skill when the task needs a real Orb instead of local-only work. Favor +direct SSH and `scp` for inspection and staging files, `cargo x d` for deploying +workspace crates, `orb-hil` for hardware-in-loop workflows, and +`orb-mcu-util` for MCU inspection and firmware operations on the device. + +## Quick Access + +An Orb with ID `1234` is reachable over Avahi as: + +```bash +ssh worldcoin@orb-1234.local +``` + +`scp` uses the same host naming: + +```bash +scp ./local-file worldcoin@orb-1234.local:/tmp/local-file +scp worldcoin@orb-1234.local:/tmp/remote-file ./remote-file +``` + +If you already know the Orb IP, replace `orb-1234.local` with that IP. + +## Deploying Crates From This Repo + +Use `cargo x d` as the short form of `cargo x deploy` from the workspace +`xtask` crate: + +```bash +export ORB_IP=orb-1234.local +export WORLDCOIN_PW='...' +cargo x d orb-mcu-util +``` + +Example: + +```bash +cargo x d orb-mcu-util +``` + +Operational notes: + +- `ORB_IP` can be a raw IP address or an Avahi host such as `orb-1234.local`. +- `WORLDCOIN_PW` should be set before deploys. The current xtask implementation + will prompt interactively if either variable is unset, but for repeatable + runs, set both env vars explicitly. +- `cargo x d ` builds the crate for + `aarch64-unknown-linux-gnu`, creates a `.deb`, copies it to the Orb, and + reinstalls it there. +- If the crate declares systemd units in its package metadata, `cargo x d` + automatically restarts the associated service on the Orb after install. + +Prefer `cargo x d` when you are deploying a workspace crate. Prefer raw `scp` +when you are just staging an artifact or test file. + +## Copying Files With scp + +Push a file onto the Orb: + +```bash +scp ./artifact.bin worldcoin@orb-1234.local:/tmp/artifact.bin +``` + +Copy a file back from the Orb: + +```bash +scp worldcoin@orb-1234.local:/tmp/log.txt ./log.txt +``` + +Common pattern for MCU work: + +```bash +scp ./target/aarch64-unknown-linux-gnu/release/main-mcu.bin \ + worldcoin@orb-1234.local:/tmp/main-mcu.bin +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util image update main --path /tmp/main-mcu.bin' +``` + +## Using the HIL CLI + +The repo ships a hardware-in-loop CLI as `orb-hil`. + +Run it from source: + +```bash +AWS_PROFILE=hil aws sso login +AWS_PROFILE=hil cargo run -p orb-hil -- --help +``` + +Peripheral requirements matter: + +- `orb-hil flash` needs an x86 Linux machine +- `orb-hil reboot` needs a serial adapter +- `orb-hil login` needs a serial adapter +- `orb-hil cmd` can work with a serial adapter or network access such as + SSH/Teleport + +Do not recommend `orb-hil reboot` or `orb-hil login` as SSH-only replacements. + +Important command families from `hil/src/main.rs`: + +- `button-ctrl` +- `cmd` +- `fetch-persistent` +- `flash` +- `login` +- `mcu` +- `nfsboot` +- `ota` +- `ping` +- `reboot` +- `set-recovery-pin` + +Examples: + +```bash +AWS_PROFILE=hil cargo run -p orb-hil -- flash --help +AWS_PROFILE=hil cargo run -p orb-hil -- reboot --help +AWS_PROFILE=hil cargo run -p orb-hil -- cmd --help +AWS_PROFILE=hil cargo run -p orb-hil -- mcu --help +``` + +Use `orb-hil` when you need hardware-in-loop flows such as flashing, reboot +orchestration, login automation, or command execution through supported HIL +transport paths. + +## Using orb-mcu-util + +`orb-mcu-util` is the direct MCU utility in this repo. It supports both normal +and CAN-FD operation; add `--can-fd` when the task specifically needs that bus. + +Core inspection and reboot commands: + +```bash +ssh worldcoin@orb-1234.local 'orb-mcu-util info' +ssh worldcoin@orb-1234.local 'orb-mcu-util info --diag' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot main' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot security' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot orb' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot --delay 30 orb' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot-behavior button' +ssh worldcoin@orb-1234.local 'orb-mcu-util reboot-behavior always-on' +``` + +Firmware image commands: + +```bash +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util image update main --path /tmp/main-mcu.bin' +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util image update security --path /tmp/security-mcu.bin' +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util image update main --path /tmp/main-mcu.bin --force' +ssh worldcoin@orb-1234.local 'orb-mcu-util image switch main' +ssh worldcoin@orb-1234.local 'orb-mcu-util image switch security' +ssh worldcoin@orb-1234.local 'orb-mcu-util image force-switch main' +``` + +Dump and stress commands: + +```bash +ssh worldcoin@orb-1234.local 'orb-mcu-util dump main --duration 30' +ssh worldcoin@orb-1234.local 'orb-mcu-util dump security --duration 30 --logs-only' +ssh worldcoin@orb-1234.local 'orb-mcu-util stress main --duration 30' +ssh worldcoin@orb-1234.local 'orb-mcu-util stress security --duration 30' +``` + +Hardware, power, and peripheral commands: + +```bash +ssh worldcoin@orb-1234.local 'orb-mcu-util hardware-revision' +ssh worldcoin@orb-1234.local 'orb-mcu-util power-cycle secure-element' +ssh worldcoin@orb-1234.local 'orb-mcu-util power-cycle heat-camera' +ssh worldcoin@orb-1234.local 'orb-mcu-util power-cycle wifi' +ssh worldcoin@orb-1234.local 'orb-mcu-util ui front red' +ssh worldcoin@orb-1234.local 'orb-mcu-util ui front white' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics gimbal-home autohome' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics gimbal-position --phi 45000 --theta 90000' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics gimbal-move --phi 100 --theta -100' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics trigger-camera eye 30' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics trigger-camera face 30' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics polarizer home' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics polarizer passthrough' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics polarizer vertical' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics polarizer horizontal' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics polarizer angle 900' +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util optics polarizer stress 0 100 --random' +ssh worldcoin@orb-1234.local \ + 'orb-mcu-util optics polarizer settings --acceleration 100 --max-speed 100' +``` + +Use these patterns: + +- `info` or `info --diag` to inspect current MCU state +- `reboot` and `reboot-behavior` to control board or Orb restart behavior +- `image update` after staging a firmware binary with `scp` +- `image switch` or `image force-switch` to change active image slots +- `dump` to collect MCU messages, optionally logs only +- `stress` to exercise MCU communication +- `power-cycle` for secure element, heat camera, or Wi-Fi power resets +- `ui` and `optics` when the task involves front LEDs, gimbal, cameras, or the + polarizer +- `optics polarizer stress ` uses positional arguments; in the + example above, `0 100` means speed `0` and `100` repetitions + +For anything beyond the examples above, run: + +```bash +ssh worldcoin@orb-1234.local 'orb-mcu-util --help' +ssh worldcoin@orb-1234.local 'orb-mcu-util image --help' +ssh worldcoin@orb-1234.local 'orb-mcu-util optics --help' +ssh worldcoin@orb-1234.local 'orb-mcu-util ui --help' +``` + +## Source Of Truth + +When the command surface matters, read these files directly: + +- `xtask/src/main.rs` +- `xtask/src/cmd/deploy.rs` +- `docs/src/hil/cli.md` +- `hil/src/main.rs` +- `mcu-util/src/main.rs` diff --git a/AGENTS.md b/AGENTS.md index 8ce6f4198..d9c854b34 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -60,3 +60,13 @@ If there are any differences between this file and AGENTS.override.md, the latte - Use the Nix/direnv environment (`.envrc`) and follow `docs/src/first-time-setup.md` to vendor required SDKs. This is typically already done by the user. - Never add closed-source or copyleft deps outside documented exceptions. - For cross-compiles and production artifacts, prefer `cargo zigbuild` and the provided CI workflows. + +## Repo Skills + +### Available skills +- developing-with-orbs: Use when working against a physical Orb from this repo, especially for SSH or scp access, deploying crates with cargo x deploy, using the orb-hil CLI, or running orb-mcu-util workflows. (file: .agents/skills/developing-with-orbs/SKILL.md) + +### How to use skills +- Discovery: Repo-local skills live under `.agents/skills/`. +- Trigger rules: If a task matches or might reasonably benefit from a repo-local skill, use it for that turn. +- Usage: Open the listed `SKILL.md`, read only what you need, and follow it directly. From c61353b792f586d293055b23080dfe670a512367 Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Thu, 19 Mar 2026 13:08:19 -0400 Subject: [PATCH 32/66] feat(x-optee): better error messages for aws signing (#1091) I wanted to provide clearer instructons when aws credentials are messed up in `cargo x optee ta sign` --- Cargo.lock | 67 ++++++++++++++++++---------------- Cargo.toml | 19 ++++++---- s3-helpers/src/client.rs | 2 +- s3-helpers/tests/common/mod.rs | 2 +- xtask/optee/Cargo.toml | 3 ++ xtask/optee/src/lib.rs | 31 +++++++++++++++- 6 files changed, 80 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e130f7f8..53d39eabf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -825,9 +825,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.8" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -992,9 +992,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.6" +version = "1.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" dependencies = [ "futures-util", "pin-project-lite", @@ -1159,9 +1159,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.2" +version = "1.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" +checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -1176,9 +1176,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.4" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" dependencies = [ "base64-simd", "bytes", @@ -3113,7 +3113,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3608,7 +3608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4282,7 +4282,7 @@ dependencies = [ "gobject-sys", "libc", "system-deps 7.0.7", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5992,7 +5992,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi 0.5.2", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6048,9 +6048,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jep106" @@ -7370,7 +7370,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8990,6 +8990,8 @@ dependencies = [ name = "orb-x-optee" version = "0.0.1" dependencies = [ + "aws-config", + "aws-credential-types", "cargo_metadata", "clap", "cmd_lib", @@ -8998,6 +9000,7 @@ dependencies = [ "object 0.38.1", "serde", "serde_json", + "tokio", "uuid 1.19.0", ] @@ -9074,7 +9077,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -11047,7 +11050,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -11168,7 +11171,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs 1.0.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -11250,9 +11253,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "sacabase" @@ -12747,7 +12750,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -13021,9 +13024,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -13147,9 +13150,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -13500,9 +13503,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -13512,9 +13515,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -13523,9 +13526,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -14280,7 +14283,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a76f5094a..9645190e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,6 +89,7 @@ async-stream = "0.3.6" async-tempfile = "0.7.0" async-trait = "0.1.88" aws-config = "=1.5.5" +aws-credential-types = "1.2.14" aws-sdk-s3 = "=1.46.0" axum = "0.8.3" base64 = "0.22.1" @@ -111,10 +112,6 @@ console-subscriber = "0.4" dashmap = "5.5.3" data-encoding = "2.3" dbus-launch = "0.2.0" -derive_more = { version = "2.1.0", default-features = false, features = [ - "display", - "from", -] } ed25519-dalek = { version = "2.1.1", default-features = false, features = ["std"] } escargot = "0.5.15" eyre = "0.6.12" @@ -147,10 +144,6 @@ pkg-config = "0.3.32" proptest = "1.10.0" rand = "0.8" regex = "1.11.2" -reqwest = { version = "0.12.24", default-features = false, features = [ - "rustls-tls", - "stream", -] } ring = "0.17.2" rkyv = "0.7.46" rustix = "0.38.37" @@ -229,10 +222,20 @@ seek-camera.path = "seek-camera/wrapper" test-utils.path = "test-utils" zenorb.path = "zenorb" +[workspace.dependencies.derive_more] +default-features = false +features = ["display", "from"] +version = "2.1.0" + [workspace.dependencies.orb-messages] git = "https://github.com/worldcoin/orb-messages" rev = "af472fadb57ce55ac63f8f94bd2a0608e62405c7" +[workspace.dependencies.reqwest] +default-features = false +features = ["rustls-tls", "stream"] +version = "0.12.24" + [patch.crates-io.optee-teec-sys] branch = "tfh" git = "https://github.com/TheButlah/teaclave-trustzone-sdk" diff --git a/s3-helpers/src/client.rs b/s3-helpers/src/client.rs index 4180c04a2..ae42480df 100644 --- a/s3-helpers/src/client.rs +++ b/s3-helpers/src/client.rs @@ -44,7 +44,7 @@ pub async fn client() -> Result { let retry_config = RetryConfig::standard().with_max_attempts(TIMEOUT_RETRY_ATTEMPTS); - let config = aws_config::defaults(BehaviorVersion::v2025_08_07()) + let config = aws_config::defaults(BehaviorVersion::v2026_01_12()) .region(region_provider) .credentials_provider(credentials_provider) .retry_config(retry_config) diff --git a/s3-helpers/tests/common/mod.rs b/s3-helpers/tests/common/mod.rs index a7a0c4beb..cf8b693c4 100644 --- a/s3-helpers/tests/common/mod.rs +++ b/s3-helpers/tests/common/mod.rs @@ -44,7 +44,7 @@ impl TestCtx { .operation_timeout(Duration::from_secs(5)) .build(), ) - .behavior_version(BehaviorVersion::v2025_08_07()) + .behavior_version(BehaviorVersion::v2026_01_12()) .region(Region::new("us-east-1")) .credentials_provider(creds) .endpoint_url(endpoint_url) diff --git a/xtask/optee/Cargo.toml b/xtask/optee/Cargo.toml index c983aedb0..0f5787952 100644 --- a/xtask/optee/Cargo.toml +++ b/xtask/optee/Cargo.toml @@ -13,6 +13,8 @@ repository.workspace = true rust-version.workspace = true [dependencies] +aws-config.workspace = true +aws-credential-types.workspace = true cargo_metadata = "0.22.0" clap = { workspace = true, features = ["derive"] } cmd_lib.workspace = true @@ -21,4 +23,5 @@ derive_more = { workspace = true, features = ["display"] } object = "0.38.1" serde = { workspace = true, features = ["derive"] } serde_json.workspace = true +tokio.workspace = true uuid.workspace = true diff --git a/xtask/optee/src/lib.rs b/xtask/optee/src/lib.rs index bd5ea5148..72b3ee858 100644 --- a/xtask/optee/src/lib.rs +++ b/xtask/optee/src/lib.rs @@ -5,11 +5,13 @@ use std::{ sync::LazyLock, }; +use aws_config::default_provider::credentials::DefaultCredentialsChain; +use aws_credential_types::provider::ProvideCredentials as _; use clap::ValueEnum; use cmd_lib::run_cmd; use color_eyre::{ - eyre::{ensure, Context as _, ContextCompat, OptionExt}, - Result, + eyre::{ensure, Context as _, ContextCompat, OptionExt as _}, + Result, Section as _, }; use derive_more::Display; use uuid::Uuid; @@ -113,6 +115,31 @@ impl SignArgs { format!("failed to read requried arg: {ENV_OPTEE_OS_PATH}") })?; + let creds_fut = async { + let credentials_provider = DefaultCredentialsChain::builder().build().await; + credentials_provider + .provide_credentials() + .await + .wrap_err("failed to get aws credentials") + .with_note(|| { + format!("AWS_PROFILE env var was {:?}", std::env::var("AWS_PROFILE")) + }) + .with_suggestion(|| { + "make sure that your aws credentials are set. Follow the instructions at + https://worldcoin.github.io/orb-software/aws-creds" + }) + .with_suggestion(|| { + "try running `AWS_PROFILE= aws sso login --use-device-code` to refresh your \ + credentials" + }) + }; + let _creds = tokio::runtime::Builder::new_current_thread() + .enable_time() + .enable_io() + .build() + .wrap_err("failed to make tokio runtime")? + .block_on(creds_fut)?; + run_cmd!(uv run --all-packages $optee_os_path/scripts/sign_encrypt.py sign-enc --uuid $inspected_uuid --in $file_to_sign --out $out_dir/$inspected_uuid.ta --key $key_id)?; Ok(()) From c5ae6fe2e4685f829177a831ed0a55042aa423f5 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Fri, 20 Mar 2026 12:00:17 +0100 Subject: [PATCH 33/66] fix: stop the key retrieval service and use systemctl reboot (#1098) a workaround while the current mcu-util approach is failing full run: https://github.com/worldcoin/orb-os/actions/runs/23339011361/job/67887807417 --- hil/src/commands/ota/system.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index 6f3671c54..aea1261dc 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -12,14 +12,16 @@ const GONDOR_CALLS_FOR_OTA_PATH: &str = "/usr/local/bin/gondor-calls-for-ota"; /// Reboot the Orb device using orb-mcu-util and shutdown pub async fn reboot_orb(session: &RemoteSession) -> Result<()> { session - .execute_command("TERM=dumb orb-mcu-util reboot orb") + .execute_command( + "TERM=dumb sudo systemctl stop worldcoin-key-retrieval.service", + ) .await - .wrap_err("Failed to execute orb-mcu-util reboot orb")?; + .wrap_err("Failed to stop the key retrieval service")?; session - .execute_command("TERM=dumb sudo shutdown now") + .execute_command("TERM=dumb sudo systemctl reboot") .await - .wrap_err("Failed to execute shutdown now")?; + .wrap_err("Failed to reboot the orb")?; Ok(()) } From 1cf2f27e26c10d7c6789706590bc67f8a0908391 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Fri, 20 Mar 2026 12:10:35 +0100 Subject: [PATCH 34/66] feat: orb-hil version file (#1099) a QoL change not to check the git log every time --- .github/workflows/deploy-hil.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/deploy-hil.yaml b/.github/workflows/deploy-hil.yaml index 5fd6dddcd..8fe79d568 100644 --- a/.github/workflows/deploy-hil.yaml +++ b/.github/workflows/deploy-hil.yaml @@ -45,6 +45,14 @@ jobs: - name: Deploy run: sudo nixos-rebuild switch --flake . + - name: Write orb-hil version file + run: | + version="$(grep -oP '(?<=version = ")[^"]+' nix/packages/orb-hil.nix)" + sudo install -d -m 755 /etc/worldcoin + printf '%s\n' "${version}" | sudo tee /etc/worldcoin/orb-hil.version > /dev/null + sudo chmod 644 /etc/worldcoin/orb-hil.version + echo "orb-hil version: ${version}" + - name: Install HIL SSH key env: HIL_SSH_PRIVATE_KEY: ${{ secrets.HIL_PRIVATE_SSH_KEY }} From f539257416903fd182ec921afcaf8fd2c5492205 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Fri, 20 Mar 2026 15:02:52 +0100 Subject: [PATCH 35/66] chore(hil): bump orb-hil to beta 19 (#1100) - also install casync & goofys to test caching possibilities --- nix/machines/hil-common.nix | 2 ++ nix/packages/orb-hil.nix | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 78088474f..0d3208790 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -79,6 +79,8 @@ in environment.systemPackages = [ orb-hil pkgs.zsync + pkgs.casync + pkgs.goofys ]; networking.hostName = "${hostname}"; diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index d72d59bc3..fa20318fb 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.18"; + version = "0.0.2-beta.19"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-LRz71fDnHIMH0VYiimGEBOP6+kW8gGvNgBjRspzQDko="; + sha256 = "sha256-8Q6THMhmZnmFMqTKH6QwCfZvUmerzjQe1yewu6qsxp0="; }; dontUnpack = true; From f727a468441b42fb7ce40376b17ae4c93f0f7483 Mon Sep 17 00:00:00 2001 From: chrisgalanis <50170911+chrisgalanis@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:15:40 +0100 Subject: [PATCH 36/66] fix: worldcoin-hil-3 configuration (#1101) This pull request makes a minor update to the `nix/machines/worldcoin-hil-munich-3/configuration.nix` file by correcting the value of the `pin_ctrl_type` configuration option. * Changed `pin_ctrl_type` value from `usbrelay` to `usb_relay` to match the expected configuration format. --- nix/machines/worldcoin-hil-munich-3/configuration.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/machines/worldcoin-hil-munich-3/configuration.nix b/nix/machines/worldcoin-hil-munich-3/configuration.nix index 8065ddbaf..b3307aafa 100644 --- a/nix/machines/worldcoin-hil-munich-3/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-3/configuration.nix @@ -24,7 +24,7 @@ platform: ${config.worldcoin.orbPlatform} # Pin controller configuration for orb-hil # Type of pin controller to use (ftdi, relay) - pin_ctrl_type: usbrelay + pin_ctrl_type: usb_relay serial_path: "/dev/serial/by-id/usb-FTDI_FT232R_USB_UART_BG031A7H-if00-port0" relay_bank: "/dev/hidraw0" relay_power_channel: 2 From 832945d0a845a699d1cfcac4432453da83485dc2 Mon Sep 17 00:00:00 2001 From: Sfikas Date: Fri, 20 Mar 2026 17:16:20 +0100 Subject: [PATCH 37/66] feat(update-agent): always calculate artifact hashes (#1087) Forces hash re-verification across reruns & removed the `.verified` logic After the earlier noticed incident, trusting the filesystem introduces edge-cases which are not worth a +10 seconds runtime of hash recalculations. This is a regression in performance, but hopefully it can eliminate another area of concern. --- update-agent/src/component.rs | 138 +++++++++------------------------- 1 file changed, 35 insertions(+), 103 deletions(-) diff --git a/update-agent/src/component.rs b/update-agent/src/component.rs index 21e660551..e807f0c3b 100644 --- a/update-agent/src/component.rs +++ b/update-agent/src/component.rs @@ -3,7 +3,7 @@ //! Whereas [ManifestComponent] represents a component listed in the manifest, the [Component] type //! defined here also includes its source and location on disk. use std::{ - fs::{metadata, remove_file, File, OpenOptions}, + fs::{metadata, remove_file, File}, io::{self, Seek, SeekFrom}, num::ParseIntError, path::{Path, PathBuf}, @@ -112,17 +112,16 @@ impl Component { let uncompressed_path = util::make_component_path(dst_dir, &self.source.unique_name()) .with_extension("uncompressed"); - let uncompressed_path_verified: PathBuf = - uncompressed_path.with_extension("uncompressed.verified"); - match check_existing_component(&uncompressed_path, self.manifest_component.size) - { + match check_existing_component( + &uncompressed_path, + self.manifest_component.size, + self.manifest_component.hash(), + ) { Ok(()) => { info!( - "found verification file at `{}`, skipping hash verification of decompressed \ - `{}`", - uncompressed_path_verified.display(), - self.manifest_component.name, + "verifying existing component at `{}` succeded", + uncompressed_path.display() ); self.on_disk = uncompressed_path; @@ -136,21 +135,6 @@ impl Component { } } - // Delete stale .uncompressed.verified file before extraction - // NOTE: this a guard-rail against update-agent edge-cases - if uncompressed_path_verified.exists() { - info!( - "removing stale verification file at `{}` before extraction", - uncompressed_path_verified.display() - ); - if let Err(e) = remove_file(&uncompressed_path_verified) { - warn!( - "failed to remove stale .uncompressed.verified file at `{}`: {e:?}", - uncompressed_path_verified.display() - ); - } - } - info!("extracting {}", self.manifest_component.name()); extract_fn(ProcessHelperArg { component: self, @@ -183,19 +167,6 @@ impl Component { } self.on_disk = uncompressed_path; - if let Err(e) = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(&uncompressed_path_verified) - .and_then(|f| f.sync_all()) - { - warn!( - "failed marking component `{}` as verified: {e:?}", - self.manifest_component.name - ) - } - Ok(()) } @@ -411,29 +382,31 @@ impl Component { fn check_existing_component( component_path: &Path, expected_size: u64, + expected_hash: &str, ) -> eyre::Result<()> { - let verified_component_path = get_verified_component_path(component_path); - ensure!( - verified_component_path.exists(), - "component at {} does not exists", - verified_component_path.display() - ); let component_size = metadata(component_path) .wrap_err(format!( "failed reading file metadata for `{}`", component_path.display() ))? .len(); + + // Validate the component size against the manifest ensure!( component_size == expected_size, "component size ({component_size}) of `{}` does not match expected size ({expected_size})", component_path.display() ); - Ok(()) -} -fn get_verified_component_path(component_path: &Path) -> PathBuf { - component_path.with_extension("verified") + // Validate the component hash against the manifest + util::check_hash(component_path, expected_hash).wrap_err_with(|| { + format!( + "failed verifying hask of extracted component file at `{}`", + component_path.display(), + ) + })?; + + Ok(()) } fn extract(path: &Path, uncompressed_download_path: &Path) -> eyre::Result<()> { @@ -629,22 +602,6 @@ pub fn download>( } }; - // Delete stale .verified flag before any download - // NOTE: this a guard-rail against update-agent edge-cases - let verified_path = get_verified_component_path(&component_path); - if verified_path.exists() { - info!( - "removing stale verification file at `{}` before download", - verified_path.display() - ); - if let Err(e) = remove_file(&verified_path) { - warn!( - "failed to remove stale .verified file at `{}`: {e:?}", - verified_path.display() - ); - } - } - if start_bytes == 0 { info!("starting download to: {}", component_path.display()); } else { @@ -779,53 +736,28 @@ pub fn fetch>( "checking sha256 hash of downloaded `{}`", manifest_component.name() ); - let path_verified = get_verified_component_path(&util::make_component_path( - &dst_dir, - &source.unique_name(), - )); - if path_verified.exists() { - info!( - "found verification file at `{}`, skipping hash verification of `{}`", - path_verified.display(), - source.name, - ); - } else { - if let Err(e) = - util::check_hash(&path, &source.hash).map_err(|e| Error::HashMismatch { - name: source.name.clone(), - source: e, - }) - { - if source.url.is_remote() { - warn!( + if let Err(e) = + util::check_hash(&path, &source.hash).map_err(|e| Error::HashMismatch { + name: source.name.clone(), + source: e, + }) + { + if source.url.is_remote() { + warn!( "deleting downloaded source blob of component `{}` because hash verification \ failed; see logs for more info", source.name ); - if let Err(rm_err) = remove_file(&path) { - warn!( - "failed deleting source blob of component `{}` at `{}`: {rm_err:?}", - source.name, - path.display(), - ); - } + if let Err(rm_err) = remove_file(&path) { + warn!( + "failed deleting source blob of component `{}` at `{}`: {rm_err:?}", + source.name, + path.display(), + ); } - return Err(e); - } - - if let Err(e) = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .open(&path_verified) - .and_then(|f| f.sync_all()) - { - warn!( - "failed marking component `{}` as verified: {e:?}", - source.name - ) } + return Err(e); } Ok(Component { From ab03b63fed2d65c02cba653e971deaaded5e89a8 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 23 Mar 2026 12:48:27 +0100 Subject: [PATCH 38/66] feat: use unpacked RTS for flashing (#1103) - useful for testing - extra updates I've been made to make --- Cargo.lock | 54 +++++++++++++++++++-------------------- deny.toml | 6 +++++ hil/src/commands/flash.rs | 2 +- hil/src/rts.rs | 41 ++++++++++++++++++++++++++--- 4 files changed, 71 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53d39eabf..5d7059dd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3113,7 +3113,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3608,7 +3608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4282,7 +4282,7 @@ dependencies = [ "gobject-sys", "libc", "system-deps 7.0.7", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5120,7 +5120,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -5598,7 +5598,7 @@ dependencies = [ "ring", "rustls 0.23.35", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "serde", "smallvec", "snafu 0.8.9", @@ -5933,7 +5933,7 @@ dependencies = [ "reqwest 0.12.24", "rustls 0.23.35", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "serde", "serde_bytes", "sha1", @@ -5992,7 +5992,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi 0.5.2", "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7370,7 +7370,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -9077,7 +9077,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.45.0", ] [[package]] @@ -10308,7 +10308,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.35", - "socket2 0.6.1", + "socket2 0.5.10", "thiserror 2.0.17", "tokio", "tracing", @@ -10347,9 +10347,9 @@ dependencies = [ "cfg_aliases 0.2.1", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -11050,7 +11050,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -11075,7 +11075,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "subtle", "zeroize", ] @@ -11146,7 +11146,7 @@ dependencies = [ "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-platform-verifier-android", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs 0.26.11", @@ -11167,11 +11167,11 @@ dependencies = [ "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-platform-verifier-android", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs 1.0.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -11203,9 +11203,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -12719,9 +12719,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" dependencies = [ "filetime", "libc", @@ -12750,7 +12750,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -14283,7 +14283,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -15431,7 +15431,7 @@ dependencies = [ "rustls 0.23.35", "rustls-pemfile 2.2.0", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "secrecy 0.8.0", "serde", "socket2 0.5.10", @@ -15462,7 +15462,7 @@ dependencies = [ "quinn", "rustls 0.23.35", "rustls-pemfile 2.2.0", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "secrecy 0.8.0", "time", "tokio", @@ -15486,7 +15486,7 @@ dependencies = [ "async-trait", "quinn", "rustls 0.23.35", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "time", "tokio", "tokio-util", @@ -15527,7 +15527,7 @@ dependencies = [ "rustls 0.23.35", "rustls-pemfile 2.2.0", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.10", "secrecy 0.8.0", "socket2 0.5.10", "time", diff --git a/deny.toml b/deny.toml index 9446c6e1f..9878e6e9f 100644 --- a/deny.toml +++ b/deny.toml @@ -7,6 +7,12 @@ ignore = [ # List of advisories we have ignored { id = "RUSTSEC-2023-0071", reason = "we dont use rsa keys" }, { id = "RUSTSEC-2026-0002", reason = "todo" }, + # astral-tokio-tar 0.5.6 is a transitive dep of testcontainers; upgrade blocked + # until testcontainers releases a version depending on astral-tokio-tar >=0.6.0 + { id = "RUSTSEC-2026-0066", reason = "transitive dep via testcontainers, low severity, no fix available upstream yet" }, + # rustls-webpki 0.101.7 and 0.102.8 are pulled in by older aws-sdk/reqwest 0.11 + # crates; upgrade blocked until those dependents update their rustls stack + { id = "RUSTSEC-2026-0049", reason = "transitive dep via aws-sdk and reqwest 0.11, low severity, upgrade blocked by upstream" }, ] unmaintained = "workspace" # only warn for direct dependencies (not transitive ones) version = 2 diff --git a/hil/src/commands/flash.rs b/hil/src/commands/flash.rs index eb261e86e..1ce964579 100644 --- a/hil/src/commands/flash.rs +++ b/hil/src/commands/flash.rs @@ -22,7 +22,7 @@ pub struct Flash { /// The directory to save the s3 artifact we download. #[arg(long)] download_dir: Option, - /// Skips download by using an existing tarball on the filesystem. + /// Path to a downloaded RTS (zipped .tar or an already-extracted directory). #[arg(long, conflicts_with = "s3_url", required_unless_present = "s3_url")] rts_path: Option, /// If this flag is given, uses fastflashcmd.txt instead of flashcmd.txt diff --git a/hil/src/rts.rs b/hil/src/rts.rs index d2a43e05f..f05de0c45 100644 --- a/hil/src/rts.rs +++ b/hil/src/rts.rs @@ -27,12 +27,12 @@ pub async fn flash( let path_to_rts = path_to_rts_tar.to_owned(); let persistent_img_path = persistent_img_path.to_owned(); - let tmp_dir = tokio::task::spawn_blocking(move || extract(&path_to_rts)) + let extracted = tokio::task::spawn_blocking(move || extract_or_use(&path_to_rts)) .await .wrap_err("task panicked")??; - println!("{tmp_dir:?}"); + println!("{:?}", extracted.path()); - let tmp_dir_path = tmp_dir.path().to_path_buf(); + let tmp_dir_path = extracted.path().to_path_buf(); if let Some(persistent_img_path) = persistent_img_path { populate_persistent(&tmp_dir_path, persistent_img_path, rng).await?; } @@ -41,7 +41,7 @@ pub async fn flash( is_recovery_mode_detected().await?, "orb not in recovery mode" ); - tokio::task::spawn_blocking(move || flash_cmd(variant, tmp_dir.path())) + tokio::task::spawn_blocking(move || flash_cmd(variant, extracted.path())) .await .wrap_err("task panicked")??; @@ -69,6 +69,39 @@ impl FlashVariant { } } +/// Holds an extracted RTS directory, either a temporary one (auto-cleaned up on +/// drop) or an existing pre-extracted directory (not cleaned up). +pub(crate) enum ExtractedRts { + Temp(TempDir), + Existing(std::path::PathBuf), +} + +impl ExtractedRts { + pub(crate) fn path(&self) -> &std::path::Path { + match self { + ExtractedRts::Temp(t) => t.path(), + ExtractedRts::Existing(p) => p.as_path(), + } + } +} + +/// If `path_to_rts` is a file, extracts it into a temporary directory. +/// If it is already a directory, uses it directly without extraction. +pub(crate) fn extract_or_use(path_to_rts: &Utf8Path) -> Result { + ensure!( + path_to_rts.try_exists().unwrap_or(false), + "{path_to_rts} doesn't exist" + ); + if path_to_rts.is_dir() { + tracing::info!("using pre-extracted rts directory {path_to_rts}"); + let path = path_to_rts.canonicalize().wrap_err_with(|| { + format!("failed to canonicalize path: {}", path_to_rts) + })?; + return Ok(ExtractedRts::Existing(path)); + } + extract(path_to_rts).map(ExtractedRts::Temp) +} + pub(crate) fn extract(path_to_rts: &Utf8Path) -> Result { ensure!( path_to_rts.try_exists().unwrap_or(false), From ae10c14b946c2aa577f92ceac8a76adff6654931 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 23 Mar 2026 15:50:22 +0100 Subject: [PATCH 39/66] refactor: create OrbConfig once at startup (#1104) safer work with OrbConfig - no risk that someone forgets to use the file when adding a new command --- hil/src/boot.rs | 2 +- hil/src/commands/button_ctrl.rs | 8 +++--- hil/src/commands/cmd.rs | 20 +++------------ hil/src/commands/login.rs | 9 +++---- hil/src/commands/ota/mod.rs | 37 +++++++++------------------- hil/src/commands/ota/reboot.rs | 9 +++---- hil/src/commands/ping.rs | 8 ++---- hil/src/commands/reboot.rs | 12 +++------ hil/src/commands/set_recovery_pin.rs | 11 +++------ hil/src/ftdi.rs | 2 +- hil/src/lib.rs | 1 + hil/src/main.rs | 18 ++++++++------ hil/src/relay.rs | 2 +- 13 files changed, 51 insertions(+), 88 deletions(-) diff --git a/hil/src/boot.rs b/hil/src/boot.rs index 1c4851f41..fbde921b7 100644 --- a/hil/src/boot.rs +++ b/hil/src/boot.rs @@ -1,6 +1,6 @@ use std::time::Duration; -use crate::orb::{BootMode, OrbManager}; +use crate::{BootMode, OrbManager}; use color_eyre::{eyre::WrapErr as _, Result}; use tracing::info; diff --git a/hil/src/commands/button_ctrl.rs b/hil/src/commands/button_ctrl.rs index e52baa3c0..f2481610b 100644 --- a/hil/src/commands/button_ctrl.rs +++ b/hil/src/commands/button_ctrl.rs @@ -4,26 +4,24 @@ use humantime::parse_duration; use std::time::Duration; use tracing::info; -use crate::orb::{orb_manager_from_config, OrbConfig}; +use crate::{orb_manager_from_config, OrbConfig}; #[derive(Debug, Parser)] pub struct ButtonCtrl { ///Button press duration (e.g., "1s", "500ms") #[arg(long, default_value = "1s", value_parser = parse_duration)] press_duration: Duration, - #[command(flatten)] - orb_config: OrbConfig, } impl ButtonCtrl { - pub async fn run(self) -> Result<()> { + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { info!( "Holding button for {} seconds", self.press_duration.as_secs_f32() ); + let orb_config = orb_config.clone(); tokio::task::spawn_blocking(move || -> Result<(), color_eyre::Report> { - let orb_config = self.orb_config.use_file_if_exists()?; let mut orb_mgr = orb_manager_from_config(&orb_config) .wrap_err("failed to create pin controller")?; diff --git a/hil/src/commands/cmd.rs b/hil/src/commands/cmd.rs index e79184bbb..d70c3c13f 100644 --- a/hil/src/commands/cmd.rs +++ b/hil/src/commands/cmd.rs @@ -19,8 +19,8 @@ use tokio_serial::SerialPortBuilderExt as _; use tokio_stream::wrappers::BroadcastStream; use tracing::{debug, warn}; -use crate::orb::OrbConfig; use crate::serial::{spawn_serial_reader_task, WaitErr}; +use crate::OrbConfig; const PATTERN_START: &str = "hil_pattern_start-"; const PATTERN_END: &str = "-hil_pattern_end"; @@ -52,9 +52,6 @@ pub struct Cmd { #[arg(long, value_enum, default_value_t = CommandTransport::Serial)] transport: CommandTransport, - #[command(flatten)] - orb: OrbConfig, - /// Username for SSH/Teleport #[arg(long)] username: Option, @@ -77,14 +74,12 @@ pub struct Cmd { } impl Cmd { - pub async fn run(self) -> Result<()> { - let orb_config = self.orb.use_file_if_exists()?; - + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { if let Some(remote_transport) = self.transport.remote_transport() { - return self.run_remote(remote_transport, &orb_config).await; + return self.run_remote(remote_transport, orb_config).await; } - self.run_serial(&orb_config).await + self.run_serial(orb_config).await } async fn run_serial(self, orb_config: &OrbConfig) -> Result<()> { @@ -302,16 +297,9 @@ mod test { use super::*; fn sample_cmd() -> Cmd { - use crate::orb::OrbConfig; Cmd { cmd: "pwd".to_owned(), transport: CommandTransport::Ssh, - orb: OrbConfig::builder() - .orb_config_path(PathBuf::from("/dev/null")) - .orb_id("test.local".to_owned()) - .serial_path(PathBuf::from("/dev/null")) - .pin_ctrl_type(crate::orb::PinControlType::Ftdi) - .build(), username: None, port: 22, password: None, diff --git a/hil/src/commands/login.rs b/hil/src/commands/login.rs index f36d386e5..0fd508316 100644 --- a/hil/src/commands/login.rs +++ b/hil/src/commands/login.rs @@ -17,15 +17,13 @@ use tokio_serial::SerialPortBuilderExt as _; use tokio_stream::wrappers::BroadcastStream; use tracing::{info, warn}; -use crate::orb::OrbConfig; use crate::serial::{spawn_serial_reader_task, wait_for_pattern}; +use crate::OrbConfig; const LOGIN_PROMPT_USER: &str = "worldcoin"; #[derive(Debug, Parser)] pub struct Login { - #[command(flatten)] - orb_config: OrbConfig, #[arg(long)] password: SecretString, /// Timeout duration per-attempt (e.g., "10s", "500ms") @@ -44,9 +42,8 @@ impl Login { .wrap_err("serial-path must be specified") } - pub async fn run(self) -> Result<()> { - let orb_config = self.orb_config.use_file_if_exists()?; - let serial_path = Login::get_serial_path(&orb_config)?; + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { + let serial_path = Login::get_serial_path(orb_config)?; let serial = tokio_serial::new( serial_path.to_string_lossy(), diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index bd3166d01..633fb9663 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -14,7 +14,7 @@ use color_eyre::{ use secrecy::SecretString; use tracing::{error, info, instrument}; -use crate::orb::{OrbConfig, Platform}; +use crate::{OrbConfig, Platform}; mod monitor; mod reboot; @@ -33,9 +33,6 @@ pub struct Ota { #[arg(long, value_enum, default_value_t = RemoteTransport::Ssh)] transport: RemoteTransport, - #[command(flatten)] - orb_config: OrbConfig, - /// Username #[arg(long)] username: Option, @@ -71,13 +68,11 @@ impl Ota { } #[instrument] - pub async fn run(self) -> Result<()> { + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let _start_time = Instant::now(); info!("Starting OTA update to version: {}", self.target_version); - let orb_config = self.orb_config.use_file_if_exists()?; - - let session = self.connect_remote(&orb_config).await.inspect_err(|e| { + let session = self.connect_remote(orb_config).await.inspect_err(|e| { println!("OTA_RESULT=FAILED"); println!("OTA_ERROR=REMOTE_CONNECTION_FAILED: {e}"); })?; @@ -98,7 +93,7 @@ impl Ota { info!("Reboot command sent to Orb device"); let new_session = self - .handle_reboot("wipe_overlays", &orb_config) + .handle_reboot("wipe_overlays", orb_config) .await .inspect_err(|e| { error!( @@ -152,13 +147,13 @@ impl Ota { // Note: log lines are printed in real-time during monitoring // After successful update update-agent reboots the orb - let session = self - .handle_reboot("update", &orb_config) - .await - .inspect_err(|e| { - println!("OTA_RESULT=FAILED"); - println!("OTA_ERROR=POST_UPDATE_REBOOT_FAILED: {e}"); - })?; + let session = + self.handle_reboot("update", orb_config) + .await + .inspect_err(|e| { + println!("OTA_RESULT=FAILED"); + println!("OTA_ERROR=POST_UPDATE_REBOOT_FAILED: {e}"); + })?; info!("Device successfully rebooted and reconnected - update application completed"); info!("Running orb-update-verifier"); @@ -267,8 +262,7 @@ impl Ota { println!("OTA_SLOT_FINAL={}", current_slot); println!("OTA_WIPE_OVERLAYS_FINAL={}", wipe_overlays_status); - let platform_name = self - .orb_config + let platform_name = orb_config .platform .map(|p| format!("{p}")) .unwrap_or_else(|| "unknown".to_string()); @@ -376,13 +370,6 @@ mod test { Ota { target_version: "test-version".to_owned(), transport: RemoteTransport::Ssh, - orb_config: OrbConfig::builder() - .orb_config_path(PathBuf::from("/dev/null")) - .orb_id("test-host".to_owned()) - .platform(Platform::Diamond) - .serial_path(PathBuf::from("/dev/null")) - .pin_ctrl_type(crate::orb::PinControlType::Ftdi) - .build(), username: None, password: None, key_path: None, diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 502b67b8a..1f91def58 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -1,5 +1,5 @@ -use crate::orb::{orb_manager_from_config, BootMode, OrbConfig}; use crate::serial::{spawn_serial_reader_task, LOGIN_PROMPT_PATTERN}; +use crate::{orb_manager_from_config, BootMode, OrbConfig}; use crate::remote_cmd::RemoteSession; use color_eyre::{ @@ -28,11 +28,10 @@ impl Ota { // // For FTDI: set_boot_mode(Normal) sets RTS HIGH and holds the handle open. // For relays: set_boot_mode(Normal) turns off both power and recovery channels. - let orb_config_for_pin = self.orb_config.clone(); + let orb_config_for_pin = orb_config.clone(); let (pin_release_tx, pin_release_rx) = std::sync::mpsc::channel::<()>(); let recovery_task = tokio::task::spawn_blocking(move || -> Result<()> { - let orb_config = orb_config_for_pin.use_file_if_exists()?; - let mut orb_mgr = orb_manager_from_config(&orb_config) + let mut orb_mgr = orb_manager_from_config(&orb_config_for_pin) .wrap_err("failed to create pin controller")?; orb_mgr.set_boot_mode(BootMode::Normal)?; info!("✓ Recovery pin set to normal boot mode, waiting for boot"); @@ -122,7 +121,7 @@ impl Ota { log_suffix: &str, orb_config: &OrbConfig, ) -> Result<()> { - let platform_name = if let Some(platform) = self.orb_config.platform { + let platform_name = if let Some(platform) = orb_config.platform { format!("{}", platform) } else { "unknown".to_string() diff --git a/hil/src/commands/ping.rs b/hil/src/commands/ping.rs index d1bf2f9dc..f6b68b89f 100644 --- a/hil/src/commands/ping.rs +++ b/hil/src/commands/ping.rs @@ -3,7 +3,7 @@ use color_eyre::{eyre::eyre, eyre::WrapErr as _, Result}; use tokio::time::{timeout, Duration}; use tracing::debug; -use crate::orb::OrbConfig; +use crate::OrbConfig; /// Wait until the orb is reachable via mDNS ping #[derive(Debug, Parser)] @@ -13,14 +13,10 @@ pub struct Ping { #[arg(long, default_value = "1s", value_parser = humantime::parse_duration)] interval: Duration, - - #[command(flatten)] - orb_config: OrbConfig, } impl Ping { - pub async fn run(self) -> Result<()> { - let orb_config = self.orb_config.use_file_if_exists()?; + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let hostname = orb_config .get_hostname() .ok_or_else(|| eyre!("orb-id or hostname must be specified"))?; diff --git a/hil/src/commands/reboot.rs b/hil/src/commands/reboot.rs index 430d9cad5..ffedd28b4 100644 --- a/hil/src/commands/reboot.rs +++ b/hil/src/commands/reboot.rs @@ -4,7 +4,7 @@ use std::num::NonZeroU8; use tokio::time::Duration; use tracing::{info, warn}; -use crate::orb::{orb_manager_from_config, OrbConfig}; +use crate::{orb_manager_from_config, OrbConfig}; /// Reboot the orb #[derive(Debug, Parser)] @@ -15,16 +15,12 @@ pub struct Reboot { make_sure: bool, #[arg(short, long, default_value_t = NonZeroU8::new(1).unwrap())] attempts_count: NonZeroU8, - #[command(flatten)] - orb_config: OrbConfig, } impl Reboot { - pub async fn run(self) -> Result<()> { - let orb_config = self.orb_config.use_file_if_exists()?; - + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let mut controller = tokio::task::block_in_place(|| { - orb_manager_from_config(&orb_config) + orb_manager_from_config(orb_config) .wrap_err("failed to create pin controller") })?; @@ -36,7 +32,7 @@ impl Reboot { { warn!("Attempt {}, cannot reboot: {}", i, e); controller = tokio::task::block_in_place(|| { - orb_manager_from_config(&orb_config) + orb_manager_from_config(orb_config) .wrap_err("failed to create pin controller") })?; continue; diff --git a/hil/src/commands/set_recovery_pin.rs b/hil/src/commands/set_recovery_pin.rs index cfe71510e..b17bf98d4 100644 --- a/hil/src/commands/set_recovery_pin.rs +++ b/hil/src/commands/set_recovery_pin.rs @@ -3,7 +3,7 @@ use color_eyre::{eyre::WrapErr as _, Result}; use std::time::Duration; use crate::ftdi::OutputState; -use crate::orb::{orb_manager_from_config, BootMode, OrbConfig}; +use crate::{orb_manager_from_config, BootMode, OrbConfig}; /// Set the recovery pin to a specific state without triggering the button /// @@ -20,8 +20,6 @@ pub struct SetRecoveryPin { /// Default is 5 seconds #[arg(long, default_value = "5")] pub duration: u64, - #[command(flatten)] - pub orb_config: OrbConfig, } fn parse_pin_state(s: &str) -> Result { @@ -36,9 +34,7 @@ fn parse_pin_state(s: &str) -> Result { } impl SetRecoveryPin { - pub async fn run(self) -> Result<()> { - let orb_config = self.orb_config.use_file_if_exists()?; - + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let state_name = match self.state { OutputState::High => "HIGH (normal boot mode)", OutputState::Low => "LOW (recovery mode)", @@ -52,9 +48,10 @@ impl SetRecoveryPin { let hold_duration = Duration::from_secs(self.duration); let state = self.state; + let owned_config = orb_config.clone(); tokio::task::spawn_blocking(move || -> Result<()> { - let mut orb_mgr = orb_manager_from_config(&orb_config) + let mut orb_mgr = orb_manager_from_config(&owned_config) .wrap_err("failed to create pin controller")?; // IMPORTANT: Set button pin HIGH first to prevent power down diff --git a/hil/src/ftdi.rs b/hil/src/ftdi.rs index 4bf7abb65..42abf8acb 100644 --- a/hil/src/ftdi.rs +++ b/hil/src/ftdi.rs @@ -15,7 +15,7 @@ //! Read more in section 4.2 of //! -use crate::orb::{BootMode, OrbManager}; +use crate::{BootMode, OrbManager}; use color_eyre::{ eyre::{bail, ensure, eyre, OptionExt, WrapErr as _}, Result, diff --git a/hil/src/lib.rs b/hil/src/lib.rs index b8a57170d..f66a5e6b5 100644 --- a/hil/src/lib.rs +++ b/hil/src/lib.rs @@ -18,6 +18,7 @@ pub mod verify; #[path = "commands/ota/mcu_util.rs"] pub mod mcu_util; +pub use orb::{orb_manager_from_config, BootMode, OrbConfig, OrbManager, Platform}; pub use remote_cmd::{RemoteConnectArgs, RemoteSession, RemoteTransport}; pub use ssh_wrapper::AuthMethod; diff --git a/hil/src/main.rs b/hil/src/main.rs index be58c0f3b..269355369 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -1,6 +1,7 @@ #![forbid(unsafe_code)] use orb_hil::commands; +use orb_hil::OrbConfig; use clap::{Parser, Subcommand}; use color_eyre::{eyre::WrapErr, Result}; @@ -13,6 +14,8 @@ const BUILD_INFO: BuildInfo = make_build_info!(); #[derive(Parser, Debug)] #[command(about, author, version=BUILD_INFO.version, styles=make_clap_v3_styles())] struct Cli { + #[command(flatten)] + orb_config: OrbConfig, #[command(subcommand)] commands: Commands, } @@ -55,19 +58,20 @@ async fn main() -> Result<()> { .init(); let args = Cli::parse(); + let orb_config = args.orb_config.use_file_if_exists()?; let run_fut = async { match args.commands { - Commands::ButtonCtrl(c) => c.run().await, - Commands::Cmd(c) => c.run().await, + Commands::ButtonCtrl(c) => c.run(&orb_config).await, + Commands::Cmd(c) => c.run(&orb_config).await, Commands::FetchPersistent(c) => c.run().await, Commands::Flash(c) => c.run().await, - Commands::Login(c) => c.run().await, + Commands::Login(c) => c.run(&orb_config).await, Commands::Mcu(c) => c.run().await, Commands::Nfsboot(c) => c.run().await, - Commands::Ota(c) => c.run().await, - Commands::Ping(c) => c.run().await, - Commands::Reboot(c) => c.run().await, - Commands::SetRecoveryPin(c) => c.run().await, + Commands::Ota(c) => c.run(&orb_config).await, + Commands::Ping(c) => c.run(&orb_config).await, + Commands::Reboot(c) => c.run(&orb_config).await, + Commands::SetRecoveryPin(c) => c.run(&orb_config).await, } }; tokio::select! { diff --git a/hil/src/relay.rs b/hil/src/relay.rs index 2df2e8b82..cfb530644 100644 --- a/hil/src/relay.rs +++ b/hil/src/relay.rs @@ -23,7 +23,7 @@ use color_eyre::{ }; use tracing::debug; -use crate::orb::{BootMode, OrbManager}; +use crate::{BootMode, OrbManager}; const HID_ON: u8 = 0xFF; const HID_OFF: u8 = 0xFD; From 529bc3a1280fa24d12e282cea8cdd501d3de01d8 Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Mon, 23 Mar 2026 22:51:10 -0400 Subject: [PATCH 40/66] feat(orb-info): add serde support for OrbId (#1105) Implements serde::{Serialize, Deserialize} on OrbId. --- Cargo.lock | 2 ++ orb-info/Cargo.toml | 4 ++++ orb-info/src/orb_id.rs | 52 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 5d7059dd8..97b1214c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8323,6 +8323,8 @@ dependencies = [ "eyre", "hex 0.4.3", "orb-attest-dbus", + "serde", + "serde_json", "serial_test 3.2.0", "tempfile", "thiserror 2.0.17", diff --git a/orb-info/Cargo.toml b/orb-info/Cargo.toml index fe0b7074a..762c7bf5b 100644 --- a/orb-info/Cargo.toml +++ b/orb-info/Cargo.toml @@ -24,11 +24,13 @@ orb-token = [ "dep:tracing", "dep:zbus", ] +serde = ["dep:serde"] [dependencies] derive_more.workspace = true hex = { workspace = true, optional = true } orb-attest-dbus = { workspace = true, optional = true } +serde = { workspace = true, optional = true, features = ["derive"] } thiserror = { workspace = true, features = ["std"] } tokio = { workspace = true, optional = true } tokio-util = { workspace = true, optional = true } @@ -38,6 +40,8 @@ zbus = { workspace = true, optional = true } [dev-dependencies] dbus-launch.workspace = true eyre.workspace = true +serde.workspace = true +serde_json.workspace = true serial_test.workspace = true tempfile.workspace = true tokio.workspace = true diff --git a/orb-info/src/orb_id.rs b/orb-info/src/orb_id.rs index 50b2288d8..8832f2ea8 100644 --- a/orb-info/src/orb_id.rs +++ b/orb-info/src/orb_id.rs @@ -61,6 +61,27 @@ macro_rules! impl_orb_id { f.write_str(&self.string) } } + + #[cfg(feature = "serde")] + impl serde::Serialize for $name { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(self.as_str()) + } + } + + #[cfg(feature = "serde")] + impl<'de> serde::Deserialize<'de> for $name { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) + } + } }; } @@ -91,6 +112,8 @@ impl_orb_id! { /// An orb id. #[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] pub enum OrbId { Short(OrbIdShort), Long(OrbIdLong), @@ -137,6 +160,13 @@ impl OrbId { Self::Long(id) => id.as_bytes(), } } + + pub fn is_long(&self) -> bool { + match self { + OrbId::Short(_) => false, + OrbId::Long(_) => true, + } + } } impl From for OrbId { @@ -235,4 +265,26 @@ mod test { std::env::remove_var("ORB_ID"); } + + #[test] + fn test_serde_orb_id() { + let short = "ea2ea744"; + let long = "ea2ea744295c5dacb12a825713f9cec1a2f4d63d86803a15fe580d6a468ab6d2"; + let short_json = serde_json::json!({ + "short": short, + "long": long, + }); + + #[derive(serde::Serialize, serde::Deserialize)] + struct Example { + short: OrbId, + long: OrbId, + } + + let deserialized: Example = serde_json::from_value(short_json).unwrap(); + assert_eq!(deserialized.short.as_str(), short); + assert!(!deserialized.short.is_long()); + assert_eq!(deserialized.long.as_str(), long); + assert!(deserialized.long.is_long()); + } } From eca9c2ae33768257d33f4252e4a0e300dabd580c Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 24 Mar 2026 13:38:28 +0100 Subject: [PATCH 41/66] chore(hil): bump orb-hil and install tio (#1107) minor changes --- nix/machines/hil-common.nix | 1 + nix/packages/orb-hil.nix | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 0d3208790..8872e70cc 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -81,6 +81,7 @@ in pkgs.zsync pkgs.casync pkgs.goofys + pkgs.tio ]; networking.hostName = "${hostname}"; diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index fa20318fb..0ebf0d2d4 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.19"; + version = "0.0.2-beta.20"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-8Q6THMhmZnmFMqTKH6QwCfZvUmerzjQe1yewu6qsxp0="; + sha256 = "sha256-daaE7oRIVduHYsxBNf+RDCSJ9byo7J/ibye0OsX0dPg="; }; dontUnpack = true; From 13a7a695a0caaeebcce40ede1a5202d6056854e3 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 25 Mar 2026 10:16:18 +0100 Subject: [PATCH 42/66] feat: use unpacked RTSs in nfsboot too (#1111) will eventually save some time during tests --- hil/src/nfsboot.rs | 56 +++++++++++++++++++++++++++++----------------- hil/src/rts.rs | 6 +++++ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/hil/src/nfsboot.rs b/hil/src/nfsboot.rs index b46a21468..2e6577d15 100644 --- a/hil/src/nfsboot.rs +++ b/hil/src/nfsboot.rs @@ -11,7 +11,7 @@ use tokio::sync::oneshot; use tokio::task::spawn_blocking; use tracing::{debug, warn}; -use crate::rts::{extract, FlashVariant}; +use crate::rts::{extract_or_use, ExtractedRts, FlashVariant}; pub const USE_NIXOS: &str = "make sure this computer is running on a recent orb-software NixOS flake"; @@ -47,11 +47,11 @@ pub async fn nfsboot( persistent_img_path: Option<&Path>, rng: impl rand::Rng + Send + 'static, ) -> Result { - let tmp_dir = tokio::task::spawn_blocking(move || extract(&path_to_rts)) + let extracted = tokio::task::spawn_blocking(move || extract_or_use(&path_to_rts)) .await .wrap_err("task panicked")??; - debug!("temp dir: {tmp_dir:?}"); - let rts_dir = tmp_dir.path().join("rts"); + debug!("extracted rts: {:?}", extracted.path()); + let rts_dir = extracted.path().join("rts"); assert!( tokio::fs::try_exists(&rts_dir).await.unwrap_or(false), "we expected a directory called `rts` after extracting" @@ -62,19 +62,21 @@ pub async fn nfsboot( } if let Some(persistent_img_path) = persistent_img_path { - crate::rts::populate_persistent(tmp_dir.path(), persistent_img_path, rng) + crate::rts::populate_persistent(extracted.path(), persistent_img_path, rng) .await?; } - let scratch_dir = tmp_dir.path().join("scratch"); - tokio::fs::create_dir(&scratch_dir) - .await - .wrap_err_with(|| format!("failed to create {scratch_dir:?}"))?; + let scratch_tmp = tokio::task::spawn_blocking(|| { + TempDir::new().wrap_err("failed to create scratch temp dir") + }) + .await + .wrap_err("task panicked")??; + let scratch_dir = scratch_tmp.path().to_path_buf(); - let tmp_dir_path = tmp_dir.path().to_path_buf(); - let rts_dir = tmp_dir.path().join("rts"); + let tmp_dir_path = extracted.path().to_path_buf(); + let rts_dir = extracted.path().join("rts"); let mounter = tokio::task::spawn_blocking(move || { - let mut mounter = Mounter::new(tmp_dir); + let mut mounter = Mounter::new(extracted, scratch_tmp); mounter .do_mounting(&rts_dir, &scratch_dir, &mounts) .map(|()| mounter) @@ -107,15 +109,17 @@ pub async fn nfsboot( #[derive(Debug)] struct Mounter { mounts: Vec, - tmp: Option, + extracted: Option, + scratch_tmp: Option, } #[bon::bon] impl Mounter { - fn new(temp_dir: TempDir) -> Self { + fn new(extracted: ExtractedRts, scratch_tmp: TempDir) -> Self { Self { mounts: Vec::new(), - tmp: Some(temp_dir), + extracted: Some(extracted), + scratch_tmp: Some(scratch_tmp), } } @@ -245,14 +249,24 @@ impl Drop for Mounter { // The regular destructor of TempDir doesn't work, because the directory contains // root-owned files. We need to delete manually with sudo. - let tmp = self.tmp.take().expect("always Some until drop"); - let tmp_path = tmp.path(); - debug!("deleting tempdir {tmp_path:?}"); - let result = run_fun!(sudo rm -rf $tmp_path) - .wrap_err("failed to remove tempdir with sudo"); - if let Err(err) = result { + let scratch = self.scratch_tmp.take().expect("always Some until drop"); + let scratch_path = scratch.path(); + debug!("deleting scratch dir {scratch_path:?}"); + if let Err(err) = run_fun!(sudo rm -rf $scratch_path) + .wrap_err("failed to remove scratch dir with sudo") + { warn!("{err:?}"); } + + if let Some(crate::rts::ExtractedRts::Temp(tmp)) = self.extracted.take() { + let tmp_path = tmp.path(); + debug!("deleting extracted rts tempdir {tmp_path:?}"); + if let Err(err) = run_fun!(sudo rm -rf $tmp_path) + .wrap_err("failed to remove extracted rts tempdir with sudo") + { + warn!("{err:?}"); + } + } } } diff --git a/hil/src/rts.rs b/hil/src/rts.rs index f05de0c45..5ddf4ee9a 100644 --- a/hil/src/rts.rs +++ b/hil/src/rts.rs @@ -85,6 +85,12 @@ impl ExtractedRts { } } +impl std::fmt::Debug for ExtractedRts { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ExtractedRts({:?})", self.path()) + } +} + /// If `path_to_rts` is a file, extracts it into a temporary directory. /// If it is already a directory, uses it directly without extraction. pub(crate) fn extract_or_use(path_to_rts: &Utf8Path) -> Result { From 0b32d335e33fa8ae8f589c8206dba5989b545c75 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 25 Mar 2026 10:26:01 +0100 Subject: [PATCH 43/66] feat: correct timezone for munich hils and more system-wide packages (#1108) - use correct timezones - install packages system-wide --- nix/machines/flake-outputs.nix | 82 +++++++++++++++++++++++++++------- nix/machines/hil-common.nix | 35 ++++++++++++--- 2 files changed, 93 insertions(+), 24 deletions(-) diff --git a/nix/machines/flake-outputs.nix b/nix/machines/flake-outputs.nix index dbbe31aaf..2b20f6ba2 100644 --- a/nix/machines/flake-outputs.nix +++ b/nix/machines/flake-outputs.nix @@ -18,6 +18,7 @@ let hostname, homeManagerCfg, diskoConfig, + extraModules ? [ ], }: nixpkgs.lib.nixosSystem rec { specialArgs = { @@ -56,36 +57,83 @@ let # setup disko for disk partitioning disko.nixosModules.disko diskoConfig - ]; + ] + ++ extraModules; }; # Helper function for all HILs. Further specializes `nixosConfig`. hilConfig = - { hostname }: + { hostname, timezone }: nixosConfig { system = "x86_64-linux"; hostname = "${hostname}"; homeManagerCfg = ./home-hil.nix; diskoConfig = ./disko-bios-uefi-hil.nix; + extraModules = [ { time.timeZone = timezone; } ]; }; # Machine list is here, if you are adding a new machine, don't edit anything # above this line. in { - nixosConfigurations."ryan-worldcoin-hil" = hilConfig { hostname = "ryan-worldcoin-hil"; }; - nixosConfigurations."worldcoin-hil-jabil-0" = hilConfig { hostname = "worldcoin-hil-jabil-0"; }; - nixosConfigurations."worldcoin-hil-munich-0" = hilConfig { hostname = "worldcoin-hil-munich-0"; }; - nixosConfigurations."worldcoin-hil-munich-1" = hilConfig { hostname = "worldcoin-hil-munich-1"; }; - nixosConfigurations."worldcoin-hil-munich-2" = hilConfig { hostname = "worldcoin-hil-munich-2"; }; - nixosConfigurations."worldcoin-hil-munich-3" = hilConfig { hostname = "worldcoin-hil-munich-3"; }; - nixosConfigurations."worldcoin-hil-munich-4" = hilConfig { hostname = "worldcoin-hil-munich-4"; }; - nixosConfigurations."worldcoin-hil-munich-5" = hilConfig { hostname = "worldcoin-hil-munich-5"; }; - nixosConfigurations."worldcoin-hil-munich-6" = hilConfig { hostname = "worldcoin-hil-munich-6"; }; - nixosConfigurations."worldcoin-hil-munich-7" = hilConfig { hostname = "worldcoin-hil-munich-7"; }; - nixosConfigurations."worldcoin-hil-munich-8" = hilConfig { hostname = "worldcoin-hil-munich-8"; }; - nixosConfigurations."worldcoin-hil-munich-9" = hilConfig { hostname = "worldcoin-hil-munich-9"; }; - nixosConfigurations."worldcoin-hil-munich-10" = hilConfig { hostname = "worldcoin-hil-munich-10"; }; - nixosConfigurations."worldcoin-hil-munich-11" = hilConfig { hostname = "worldcoin-hil-munich-11"; }; - nixosConfigurations."worldcoin-hil-sf-0" = hilConfig { hostname = "worldcoin-hil-sf-0"; }; + nixosConfigurations."ryan-worldcoin-hil" = hilConfig { + hostname = "ryan-worldcoin-hil"; + timezone = "America/New_York"; + }; + nixosConfigurations."worldcoin-hil-jabil-0" = hilConfig { + hostname = "worldcoin-hil-jabil-0"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-0" = hilConfig { + hostname = "worldcoin-hil-munich-0"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-1" = hilConfig { + hostname = "worldcoin-hil-munich-1"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-2" = hilConfig { + hostname = "worldcoin-hil-munich-2"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-3" = hilConfig { + hostname = "worldcoin-hil-munich-3"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-4" = hilConfig { + hostname = "worldcoin-hil-munich-4"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-5" = hilConfig { + hostname = "worldcoin-hil-munich-5"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-6" = hilConfig { + hostname = "worldcoin-hil-munich-6"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-7" = hilConfig { + hostname = "worldcoin-hil-munich-7"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-8" = hilConfig { + hostname = "worldcoin-hil-munich-8"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-9" = hilConfig { + hostname = "worldcoin-hil-munich-9"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-10" = hilConfig { + hostname = "worldcoin-hil-munich-10"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-munich-11" = hilConfig { + hostname = "worldcoin-hil-munich-11"; + timezone = "Europe/Berlin"; + }; + nixosConfigurations."worldcoin-hil-sf-0" = hilConfig { + hostname = "worldcoin-hil-sf-0"; + timezone = "America/Los_Angeles"; + }; nixosConfigurations."liveusb" = nixosConfig { system = "x86_64-linux"; hostname = "liveusb"; diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 8872e70cc..37e9947c4 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -76,12 +76,34 @@ in config = { # Install test-related packages - environment.systemPackages = [ + environment.systemPackages = with pkgs; [ orb-hil - pkgs.zsync - pkgs.casync - pkgs.goofys - pkgs.tio + zsync + casync + goofys + tio + bun + curl + dtc + gcc + zstd + libxml2 + lz4c + openssl + perl + udev + libguestfs-with-appliance + abootimg + (python312.withPackages ( + ps: with ps; [ + pyyaml + pyserial + pyftdi + pyocd + cmsis-pack-manager + cffi + ] + )) ]; networking.hostName = "${hostname}"; @@ -124,8 +146,7 @@ in KERNEL=="hidraw*", SUBSYSTEM=="hidraw", MODE="0664", GROUP="plugdev" ''; - # Set your time zone. - time.timeZone = "America/New_York"; + environment.variables.NIXPKGS_ALLOW_UNFREE = "1"; # Select internationalisation properties. i18n.defaultLocale = "en_US.UTF-8"; From 61bca3a4549b76ac02c4f465927b85e008f0e10d Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 25 Mar 2026 10:39:41 +0100 Subject: [PATCH 44/66] fix: also install the ssh key for the hil users (#1110) so the permissions are correct 0600 --- .github/workflows/deploy-hil.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/deploy-hil.yaml b/.github/workflows/deploy-hil.yaml index 8fe79d568..73f906b51 100644 --- a/.github/workflows/deploy-hil.yaml +++ b/.github/workflows/deploy-hil.yaml @@ -72,6 +72,29 @@ jobs: sudo chmod 640 "${HIL_SSH_SYSTEM_KEY_PATH}" fi + - name: Install HIL SSH key for users + env: + HIL_SSH_PRIVATE_KEY: ${{ secrets.HIL_PRIVATE_SSH_KEY }} + run: | + set -Eeuo pipefail + + tmp_key="$(mktemp)" + trap 'rm -f "${tmp_key}"' EXIT + printf '%s\n' "${HIL_SSH_PRIVATE_KEY}" > "${tmp_key}" + + for user in worldcoin gh-runner-user; do + home="$(getent passwd "${user}" | cut -d: -f6)" + ssh_dir="${home}/.ssh" + key_path="${ssh_dir}/id_ed25519" + sudo install -d -m 700 -o "${user}" -g users "${ssh_dir}" + if ! sudo test -f "${key_path}" || ! sudo cmp -s "${tmp_key}" "${key_path}"; then + sudo install -m 600 -o "${user}" -g users "${tmp_key}" "${key_path}" + else + sudo chown "${user}:users" "${key_path}" + sudo chmod 600 "${key_path}" + fi + done + - name: Summary if: always() run: | From 820d4f149eab1ed27b5b37a123ce3a79b4344323 Mon Sep 17 00:00:00 2001 From: vmenge Date: Wed, 25 Mar 2026 12:22:18 +0100 Subject: [PATCH 45/66] feat(mcu-util): add modem power-cycle command (#1094) ## context needed by `orb-connd` for modem self-healing when the modem is blacklisted by `ModemManager` ## todo - [x] test on an orb --- mcu-util/src/main.rs | 6 ++++++ mcu-util/src/orb/main_board.rs | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/mcu-util/src/main.rs b/mcu-util/src/main.rs index ebf576043..4a2af6194 100644 --- a/mcu-util/src/main.rs +++ b/mcu-util/src/main.rs @@ -311,6 +311,9 @@ enum PowerCycleComponent { /// [dev] Power-cycle the Wifi & BLE module #[clap(action)] Wifi, + /// Power-cycle the modem LTE power rail + #[clap(action)] + Modem, } async fn execute(args: Args) -> Result<()> { @@ -456,6 +459,9 @@ async fn execute(args: Args) -> Result<()> { PowerCycleComponent::Wifi => { orb.main_board_mut().wifi_power_cycle().await? } + PowerCycleComponent::Modem => { + orb.main_board_mut().modem_power_cycle().await? + } }, SubCommand::Ui(opts) => match opts { UiOpts::Front(leds) => orb.main_board_mut().front_leds(leds).await?, diff --git a/mcu-util/src/orb/main_board.rs b/mcu-util/src/orb/main_board.rs index 1767fdac3..202f8b602 100644 --- a/mcu-util/src/orb/main_board.rs +++ b/mcu-util/src/orb/main_board.rs @@ -745,6 +745,29 @@ impl MainBoard { Ok(()) } + pub async fn modem_power_cycle(&mut self) -> Result<()> { + match self + .send(McuPayload::ToMain( + main_messaging::jetson_to_mcu::Payload::PowerCycle( + main_messaging::PowerCycle { + line: main_messaging::power_cycle::Line::Lte3v3 as i32, + duration_ms: 0, // use default + }, + ), + )) + .await + { + Ok(CommonAckError::Success) => { /* nothing */ } + Ok(a) => { + return Err(eyre!("error power cycling modem: ack {a:?}")); + } + Err(e) => { + return Err(eyre!("error power cycling modem: {e:?}")); + } + } + Ok(()) + } + pub async fn heat_camera_power_cycle(&mut self) -> Result<()> { match self .send(McuPayload::ToMain( From 33174c62d02e5330c5d47ae0e7e13c0c5a1c0c81 Mon Sep 17 00:00:00 2001 From: vmenge Date: Wed, 25 Mar 2026 12:39:45 +0100 Subject: [PATCH 46/66] refactor(connd): refactor of tasks; modem self-healing; new OES events (#1109) this PR refactors task usage in `connd` to rely on `speare` for easier task management, free restart / backoff logic and a free broker for named channels between tasks. it also publishes new data on the OES, and introduces modem self-healing ## new - modem self-healing (powercycle whenever it is blacklisted by modem manager) - `fw_revision` field on `CellularStatus` - `CellularStatus` now published on OES - `ConndReport` and `ActiveConnections` both simplified to publish based on `net-state` event published internally on `speare` broker - `NetStats` now on OES - Datadog reporter now reporting usage for eth / wwan / wlan instead of only wlan - logging of number of wifi profiles on startup to help debug potential issues ## tested on an orb yes ## do not merge [until this PR is merged](https://github.com/worldcoin/orb-software/pull/1094) (soon, i just need to unclankerfy it) --- Cargo.lock | 70 +++--- Cargo.toml | 1 + orb-backend-status/dbus/src/types.rs | 1 + orb-backend-status/src/backend/status.rs | 1 + orb-backend-status/src/backend/types.rs | 1 + orb-backend-status/tests/fixture.rs | 1 + orb-connd/Cargo.toml | 3 +- orb-connd/src/connectivity_daemon.rs | 147 ++++-------- orb-connd/src/lib.rs | 8 +- orb-connd/src/main.rs | 13 +- orb-connd/src/mcu_util/cli.rs | 23 ++ orb-connd/src/mcu_util/mod.rs | 13 ++ orb-connd/src/modem/mod.rs | 216 ++++++++++++++++++ orb-connd/src/modem_manager/cli.rs | 6 + orb-connd/src/modem_manager/mod.rs | 1 + ...ctions_report.rs => active_connections.rs} | 40 ++-- .../backend_status_cellular_reporter.rs | 58 ----- .../reporters/backend_status_wifi_reporter.rs | 143 ------------ orb-connd/src/reporters/cellular_status.rs | 63 +++++ orb-connd/src/reporters/connd_report.rs | 99 ++++++++ orb-connd/src/reporters/datadog.rs | 110 +++++++++ orb-connd/src/reporters/dd_modem_reporter.rs | 114 --------- orb-connd/src/reporters/mod.rs | 178 ++++++--------- orb-connd/src/reporters/modem_monitor.rs | 78 ------- orb-connd/src/reporters/modem_status.rs | 39 ---- .../src/reporters/net_changed_reporter.rs | 114 --------- orb-connd/src/reporters/net_state.rs | 80 +++++++ orb-connd/src/reporters/net_stats.rs | 109 +++++++-- orb-connd/src/service/mod.rs | 32 +-- orb-connd/src/statsd/dd.rs | 26 +-- orb-connd/src/statsd/mod.rs | 22 +- orb-connd/src/systemd.rs | 22 ++ orb-connd/src/utils.rs | 31 +-- orb-connd/tests/fixture.rs | 52 +++-- 34 files changed, 994 insertions(+), 921 deletions(-) create mode 100644 orb-connd/src/mcu_util/cli.rs create mode 100644 orb-connd/src/mcu_util/mod.rs create mode 100644 orb-connd/src/modem/mod.rs rename orb-connd/src/reporters/{active_connections_report.rs => active_connections.rs} (90%) delete mode 100644 orb-connd/src/reporters/backend_status_cellular_reporter.rs delete mode 100644 orb-connd/src/reporters/backend_status_wifi_reporter.rs create mode 100644 orb-connd/src/reporters/cellular_status.rs create mode 100644 orb-connd/src/reporters/connd_report.rs create mode 100644 orb-connd/src/reporters/datadog.rs delete mode 100644 orb-connd/src/reporters/dd_modem_reporter.rs delete mode 100644 orb-connd/src/reporters/modem_monitor.rs delete mode 100644 orb-connd/src/reporters/modem_status.rs delete mode 100644 orb-connd/src/reporters/net_changed_reporter.rs create mode 100644 orb-connd/src/reporters/net_state.rs create mode 100644 orb-connd/src/systemd.rs diff --git a/Cargo.lock b/Cargo.lock index 97b1214c1..63f6c8746 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3113,7 +3113,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3608,7 +3608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4070,9 +4070,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" @@ -4282,7 +4282,7 @@ dependencies = [ "gobject-sys", "libc", "system-deps 7.0.7", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5120,7 +5120,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.1", "tokio", "tower-service", "tracing", @@ -5598,7 +5598,7 @@ dependencies = [ "ring", "rustls 0.23.35", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "serde", "smallvec", "snafu 0.8.9", @@ -5933,7 +5933,7 @@ dependencies = [ "reqwest 0.12.24", "rustls 0.23.35", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "serde", "serde_bytes", "sha1", @@ -5992,7 +5992,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi 0.5.2", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6048,9 +6048,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jep106" @@ -7370,7 +7370,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8206,6 +8206,7 @@ dependencies = [ "secrecy 0.8.0", "serde", "serde_json", + "speare", "test-utils", "test-with", "thiserror 2.0.17", @@ -9079,7 +9080,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.45.0", + "windows-sys 0.61.2", ] [[package]] @@ -10310,7 +10311,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.35", - "socket2 0.5.10", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -10349,9 +10350,9 @@ dependencies = [ "cfg_aliases 0.2.1", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.1", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -11052,7 +11053,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -11077,7 +11078,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "subtle", "zeroize", ] @@ -11148,7 +11149,7 @@ dependencies = [ "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-platform-verifier-android", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs 0.26.11", @@ -11169,11 +11170,11 @@ dependencies = [ "rustls 0.23.35", "rustls-native-certs 0.8.2", "rustls-platform-verifier-android", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs 1.0.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -11205,9 +11206,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "ring", "rustls-pki-types", @@ -12088,6 +12089,17 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "speare" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db73309d2824b1a95d9e934fa2e2ed1686731926f51a077f6541cf1a3506f8f" +dependencies = [ + "flume", + "futures-core", + "tokio", +] + [[package]] name = "spin" version = "0.9.8" @@ -12752,7 +12764,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -14285,7 +14297,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -15433,7 +15445,7 @@ dependencies = [ "rustls 0.23.35", "rustls-pemfile 2.2.0", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "secrecy 0.8.0", "serde", "socket2 0.5.10", @@ -15464,7 +15476,7 @@ dependencies = [ "quinn", "rustls 0.23.35", "rustls-pemfile 2.2.0", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "secrecy 0.8.0", "time", "tokio", @@ -15488,7 +15500,7 @@ dependencies = [ "async-trait", "quinn", "rustls 0.23.35", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "time", "tokio", "tokio-util", @@ -15529,7 +15541,7 @@ dependencies = [ "rustls 0.23.35", "rustls-pemfile 2.2.0", "rustls-pki-types", - "rustls-webpki 0.103.10", + "rustls-webpki 0.103.8", "secrecy 0.8.0", "socket2 0.5.10", "time", diff --git a/Cargo.toml b/Cargo.toml index 9645190e9..959f20e82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,6 +155,7 @@ serde_yaml = "0.9" serial_test = "3.2.0" serialport = "4.6.1" sha2 = "0.10.8" +speare = "0.4.3" sqlx = { version = "0.8.5", default-features = false } tar = "0.4" tempfile = "3.10.1" diff --git a/orb-backend-status/dbus/src/types.rs b/orb-backend-status/dbus/src/types.rs index 230c288e4..093c1b0dc 100644 --- a/orb-backend-status/dbus/src/types.rs +++ b/orb-backend-status/dbus/src/types.rs @@ -74,6 +74,7 @@ pub struct NetIntf { #[derive(Debug, Clone, Type, Serialize, Deserialize, PartialEq)] pub struct CellularStatus { pub imei: String, + pub fw_revision: Option, pub iccid: Option, /// Radio Access Technology -- e.g.: gsm, lte pub rat: Option, diff --git a/orb-backend-status/src/backend/status.rs b/orb-backend-status/src/backend/status.rs index 57bab068c..47ef0579b 100644 --- a/orb-backend-status/src/backend/status.rs +++ b/orb-backend-status/src/backend/status.rs @@ -254,6 +254,7 @@ async fn build_status_request_v2( .and_then(|cs| cs.iccid.as_ref().map(|iccid| (cs, iccid))) .map(|(cs, iccid)| CellularStatusApiV2 { imei: cs.imei.clone(), + fw_revision: cs.fw_revision.clone(), iccid: iccid.to_owned(), rat: cs.rat.clone(), operator: cs.operator.clone(), diff --git a/orb-backend-status/src/backend/types.rs b/orb-backend-status/src/backend/types.rs index 7e1a670e3..b4268633e 100644 --- a/orb-backend-status/src/backend/types.rs +++ b/orb-backend-status/src/backend/types.rs @@ -213,6 +213,7 @@ pub struct NetIntfApiV2 { pub struct CellularStatusApiV2 { pub imei: String, pub iccid: String, + pub fw_revision: Option, /// Radio Access Technology -- e.g.: gsm, lte pub rat: Option, pub operator: Option, diff --git a/orb-backend-status/tests/fixture.rs b/orb-backend-status/tests/fixture.rs index 68e7df768..fdfbf5ab8 100644 --- a/orb-backend-status/tests/fixture.rs +++ b/orb-backend-status/tests/fixture.rs @@ -634,6 +634,7 @@ pub mod mocks { ) -> zbus::Result<()> { let status = ( imei.to_string(), // imei + Option::::None, // fw_revision Option::::None, // iccid Some("lte".to_string()), // rat operator.map(String::from), // operator diff --git a/orb-connd/Cargo.toml b/orb-connd/Cargo.toml index a50c8c708..ee5a3b33b 100644 --- a/orb-connd/Cargo.toml +++ b/orb-connd/Cargo.toml @@ -52,6 +52,7 @@ rkyv = { workspace = true, features = ["validation"] } secrecy.workspace = true serde.workspace = true serde_json.workspace = true +speare.workspace = true thiserror = { workspace = true, features = ["std"] } tokio = { workspace = true, features = ["full"] } tokio-serde = { workspace = true, features = ["cbor"] } @@ -61,7 +62,7 @@ tracing.workspace = true uuid = { workspace = true, features = ["v4"] } uzers = "0.12.0" zbus.workspace = true -zbus_systemd = { workspace = true, features = ["resolve1"] } +zbus_systemd = { workspace = true, features = ["resolve1", "systemd1"] } zenorb.workspace = true [dev-dependencies] diff --git a/orb-connd/src/connectivity_daemon.rs b/orb-connd/src/connectivity_daemon.rs index 82efb28ff..df6af5821 100644 --- a/orb-connd/src/connectivity_daemon.rs +++ b/orb-connd/src/connectivity_daemon.rs @@ -1,16 +1,18 @@ +use crate::mcu_util::McuUtil; use crate::modem_manager::ModemManager; use crate::network_manager::NetworkManager; use crate::resolved::Resolved; use crate::service::{ConndService, ProfileStorage}; use crate::statsd::StatsdClient; -use crate::{reporters, OrbCapabilities, Tasks}; -use color_eyre::eyre::{OptionExt, Result}; +use crate::systemd::Systemd; +use crate::{modem, reporters, OrbCapabilities}; +use color_eyre::eyre::{Context, Result}; use orb_info::orb_os_release::OrbOsRelease; -use std::time::{Duration, Instant}; +use speare::mini::{self, OnErr}; +use speare::Backoff; +use std::time::Duration; use std::{path::Path, sync::Arc}; -use tokio::{task, time}; use tracing::info; -use tracing::{error, warn}; use zenorb::zenoh::bytes::Encoding; use zenorb::Zenorb; @@ -19,17 +21,21 @@ pub async fn program( sysfs: impl AsRef, usr_persistent: impl AsRef, network_manager: NetworkManager, + systemd: Systemd, resolved: Resolved, session_bus: zbus::Connection, os_release: OrbOsRelease, statsd_client: impl StatsdClient, modem_manager: impl ModemManager, + mcu_util: impl McuUtil, connect_timeout: Duration, profile_storage: ProfileStorage, zenoh: &Zenorb, -) -> Result { +) -> Result> { let sysfs = sysfs.as_ref().to_path_buf(); let modem_manager: Arc = Arc::new(modem_manager); + let mcu_util: Arc = Arc::new(mcu_util); + let statsd_client: Arc = Arc::new(statsd_client); let cap = OrbCapabilities::from_sysfs(&sysfs).await; @@ -44,9 +50,15 @@ pub async fn program( .publisher_with("oes/active_connections", |p| { p.encoding(Encoding::APPLICATION_JSON) }) + .publisher_with("oes/cellular_status", |p| { + p.encoding(Encoding::APPLICATION_JSON) + }) + .publisher_with("oes/netstats", |p| p.encoding(Encoding::APPLICATION_JSON)) .build() .await?; + let speare = speare::mini::root(); + let connd = ConndService::new( session_bus.clone(), network_manager.clone(), @@ -58,100 +70,41 @@ pub async fn program( ) .await?; - let mut tasks = vec![connd.spawn()]; + speare.oneshot(async move |_| connd.spawn().await)?; + + reporters::spawn( + &speare, + network_manager, + resolved, + session_bus, + statsd_client, + sysfs, + zsender, + ) + .await?; if let OrbCapabilities::CellularAndWifi = cap { - setup_modem_bands_and_modes(&modem_manager); + speare + .task_with() + .args(modem::Args { + poll_interval: Duration::from_secs(30), + modem_manager, + mcu_util, + systemd, + }) + .on_err(OnErr::Restart { + max: 10.into(), + backoff: Backoff::Incremental { + min: Duration::from_secs(10), + max: Duration::from_secs(100), + step: Duration::from_secs(10), + }, + }) + .spawn(modem::supervisor) + .wrap_err("failed to spawn modem supervisor")?; } - tasks.extend( - reporters::spawn( - network_manager, - resolved, - session_bus, - modem_manager, - statsd_client, - sysfs, - cap, - zsender, - ) - .await, - ); - - Ok(tasks) -} - -fn setup_modem_bands_and_modes(mm: &Arc) { - let mm = Arc::clone(mm); - - task::spawn(async move { - info!("trying to setup modem bands, allowed and preferred modes"); - - let run = async || -> Result<()> { - let modem = mm - .list_modems() - .await? - .into_iter() - .next() - .ok_or_eyre("couldn't find a modem")?; - - let bands = [ - "egsm", - "dcs", - "pcs", - "g850", - "utran-1", - "utran-2", - "utran-4", - "utran-5", - "utran-6", - "utran-8", - "eutran-1", - "eutran-2", - "eutran-3", - "eutran-4", - "eutran-5", - "eutran-7", - "eutran-8", - "eutran-9", - "eutran-12", - "eutran-13", - "eutran-14", - "eutran-18", - "eutran-19", - "eutran-20", - "eutran-25", - "eutran-26", - "eutran-28", - ]; - - mm.set_current_bands(&modem.id, &bands).await?; - info!("modem bands set up successfully"); - - match mm - .set_allowed_and_preferred_modes(&modem.id, &["3g", "4g"], "4g") - .await - { - Err(e) => warn!("allowed and preferred could not be set up: {e}"), - Ok(_) => info!("allowed and preferred modes set up successfully"), - }; - - Ok(()) - }; - - let start = Instant::now(); - let timeout = Duration::from_secs(60); - while let Err(e) = run().await { - if start.elapsed() > timeout { - error!("timeout reached while setting up bands and preferred/allowed modes for modem: {e}"); - break; - } - - error!( - "failed to set up bands and preferred/allowed modes for modem: {e}. trying again in 10s" - ); + info!("finished connd startup"); - time::sleep(Duration::from_secs(10)).await; - } - }); + Ok(speare) } diff --git a/orb-connd/src/lib.rs b/orb-connd/src/lib.rs index f476e2c91..1b87349d8 100644 --- a/orb-connd/src/lib.rs +++ b/orb-connd/src/lib.rs @@ -1,9 +1,10 @@ -use color_eyre::Result; use derive_more::Display; use std::path::Path; -use tokio::{fs, task::JoinHandle}; +use tokio::fs; pub mod connectivity_daemon; +pub mod mcu_util; +pub mod modem; pub mod modem_manager; pub mod network_manager; pub mod reporters; @@ -11,12 +12,11 @@ pub mod resolved; pub mod secure_storage; pub mod service; pub mod statsd; +pub mod systemd; pub mod wpa_ctrl; mod utils; -pub(crate) type Tasks = Vec>>; - #[derive(Display, Debug, PartialEq, Copy, Clone)] pub enum OrbCapabilities { CellularAndWifi, diff --git a/orb-connd/src/main.rs b/orb-connd/src/main.rs index d65673b7a..6f8e37f2d 100644 --- a/orb-connd/src/main.rs +++ b/orb-connd/src/main.rs @@ -3,12 +3,14 @@ use color_eyre::eyre::{Context, Result}; use orb_build_info::{make_build_info, BuildInfo}; use orb_connd::{ connectivity_daemon, + mcu_util::cli::McuUtilCli, modem_manager::cli::ModemManagerCli, network_manager::NetworkManager, resolved::Resolved, secure_storage::{self, ConndStorageScopes, SecureStorage}, service::ProfileStorage, statsd::dd::DogstatsdClient, + systemd::Systemd, wpa_ctrl::cli::WpaCli, }; use orb_info::{ @@ -80,7 +82,8 @@ fn connectivity_daemon() -> Result<()> { system_bus.clone(), WpaCli::new(os_release.orb_os_platform_type), ); - let resolved = Resolved::new(system_bus); + let resolved = Resolved::new(system_bus.clone()); + let systemd = Systemd::new(system_bus); let cancel_token = CancellationToken::new(); let profile_storage = match os_release.orb_os_platform_type { @@ -102,7 +105,7 @@ fn connectivity_daemon() -> Result<()> { .with_name("connd") .await?; - let tasks = connectivity_daemon::program() + let speare = connectivity_daemon::program() .sysfs("/sys") .usr_persistent("/usr/persistent") .network_manager(nm) @@ -114,6 +117,8 @@ fn connectivity_daemon() -> Result<()> { .connect_timeout(Duration::from_secs(15)) .profile_storage(profile_storage) .zenoh(&zenoh) + .mcu_util(McuUtilCli) + .systemd(systemd) .run() .await?; @@ -128,9 +133,7 @@ fn connectivity_daemon() -> Result<()> { info!("aborting tasks and exiting gracefully"); cancel_token.cancel(); - for handle in tasks { - handle.abort(); - } + speare.abort_children()?; Ok(()) }) diff --git a/orb-connd/src/mcu_util/cli.rs b/orb-connd/src/mcu_util/cli.rs new file mode 100644 index 000000000..d63e1cf8c --- /dev/null +++ b/orb-connd/src/mcu_util/cli.rs @@ -0,0 +1,23 @@ +use crate::{ + mcu_util::{McuUtil, Module}, + utils::run_cmd, +}; +use async_trait::async_trait; +use color_eyre::eyre::Context; + +pub struct McuUtilCli; + +#[async_trait] +impl McuUtil for McuUtilCli { + async fn powercycle(&self, module: Module) -> color_eyre::eyre::Result<()> { + let module = match module { + Module::Modem => "modem", + }; + + let _ = run_cmd("orb-mcu-util", &["power-cycle", module]) + .await + .wrap_err_with(|| format!("failed to powercycle {module}"))?; + + Ok(()) + } +} diff --git a/orb-connd/src/mcu_util/mod.rs b/orb-connd/src/mcu_util/mod.rs new file mode 100644 index 000000000..a4bcea743 --- /dev/null +++ b/orb-connd/src/mcu_util/mod.rs @@ -0,0 +1,13 @@ +use async_trait::async_trait; +use color_eyre::Result; + +pub mod cli; + +pub enum Module { + Modem, +} + +#[async_trait] +pub trait McuUtil: 'static + Send + Sync { + async fn powercycle(&self, module: Module) -> Result<()>; +} diff --git a/orb-connd/src/modem/mod.rs b/orb-connd/src/modem/mod.rs new file mode 100644 index 000000000..59b2dc08d --- /dev/null +++ b/orb-connd/src/modem/mod.rs @@ -0,0 +1,216 @@ +use crate::{ + mcu_util::{McuUtil, Module}, + modem_manager::{ + connection_state::ConnectionState, Location, ModemId, ModemManager, Signal, + }, + systemd::Systemd, +}; +use color_eyre::{ + eyre::{eyre, Context, ContextCompat}, + Result, +}; +use speare::mini; +use std::{sync::Arc, time::Duration}; +use tokio::{ + fs, + time::{self, timeout}, +}; +use tracing::{error, info, warn}; + +#[derive(Debug, Clone)] +pub struct Snapshot { + pub id: ModemId, + pub fw_revision: Option, + pub iccid: Option, + pub imei: String, + pub rat: Option, + pub operator: Option, + pub state: ConnectionState, + pub signal: Signal, + pub location: Location, +} + +pub struct Args { + pub poll_interval: Duration, + pub modem_manager: Arc, + pub mcu_util: Arc, + pub systemd: Systemd, +} + +pub async fn supervisor(ctx: mini::Ctx) -> Result<()> { + info!("starting modem supervisor"); + + let mut snapshot: Option = None; + let mut refresh_snapshot = async || -> Result<()> { + let new_snapshot = take_snapshot(ctx.modem_manager.as_ref()).await?; + + let modem_id_changed_msg = match &snapshot { + None => Some(format!( + "modem detected with id {}", + new_snapshot.id.as_str() + )), + + Some(snap) if snap.id != new_snapshot.id => Some(format!( + "modem changed id from {} to {}", + snap.id.as_str(), + new_snapshot.id.as_str() + )), + + _ => None, + }; + + if let Some(msg) = modem_id_changed_msg { + warn!(msg); + + let _ = + setup_signal_and_bands(ctx.modem_manager.as_ref(), &new_snapshot.id) + .await + .inspect_err(|e| warn!("failed to setup signal and bands: {e:?}")); + } + + let _ = ctx.publish("modem-snapshot", new_snapshot.clone()); + + snapshot = Some(new_snapshot); + + Ok(()) + }; + + let mut update_interval = time::interval(ctx.poll_interval); + + loop { + if let Err(e) = refresh_snapshot().await { + error!("failed to refresh modem snapshot with err: {e}"); + error!("powercycling modem"); + + let _ = powercycle_modem(ctx.mcu_util.as_ref(), &ctx.systemd) + .await + .inspect_err(|e| { + error!("failed to to powercycle modem with err: {e:?}"); + }); + + return Err(e); + } + + update_interval.tick().await; + } +} + +async fn take_snapshot(mm: &dyn ModemManager) -> Result { + let modem = mm + .list_modems() + .await? + .into_iter() + .next() + .wrap_err("couldn't find a modem")?; + + let modem_info = mm.modem_info(&modem.id).await?; + + let iccid = match modem_info.sim { + None => None, + Some(sim_id) => { + let sim_info = mm.sim_info(&sim_id).await?; + + Some(sim_info.iccid) + } + }; + + let signal = mm + .signal_get(&modem.id) + .await + .inspect_err(|e| warn!("failed to retrieve modem signal info: {e}")) + .unwrap_or_default(); + + let location = mm + .location_get(&modem.id) + .await + .inspect_err(|e| warn!("failed to retrieve modem location info: {e}")) + .unwrap_or_default(); + + Ok(Snapshot { + id: modem.id, + fw_revision: modem_info.fw_revision, + iccid, + imei: modem_info.imei, + rat: modem_info.access_tech, + operator: modem_info.operator_name, + state: modem_info.state, + signal, + location, + }) +} + +async fn setup_signal_and_bands(mm: &dyn ModemManager, id: &ModemId) -> Result<()> { + mm.signal_setup(id, std::time::Duration::from_secs(10)) + .await + .map_err(|e| eyre!("could not update modem signal refresh rate: {e}"))?; + + mm.set_current_bands(id, &ALLOWED_BANDS) + .await + .map_err(|e| eyre!("could not set modem bands: {e}"))?; + + Ok(()) +} + +static ALLOWED_BANDS: [&str; 27] = [ + "egsm", + "dcs", + "pcs", + "g850", + "utran-1", + "utran-2", + "utran-4", + "utran-5", + "utran-6", + "utran-8", + "eutran-1", + "eutran-2", + "eutran-3", + "eutran-4", + "eutran-5", + "eutran-7", + "eutran-8", + "eutran-9", + "eutran-12", + "eutran-13", + "eutran-14", + "eutran-18", + "eutran-19", + "eutran-20", + "eutran-25", + "eutran-26", + "eutran-28", +]; + +async fn powercycle_modem(mcu_util: &dyn McuUtil, systemd: &Systemd) -> Result<()> { + mcu_util + .powercycle(Module::Modem) + .await + .wrap_err("mcu-util power-cycle")?; + + time::sleep(Duration::from_secs(5)).await; + + let device_exists = async { + loop { + if fs::try_exists("/dev/cdc-wdm0").await.is_ok_and(|x| x) { + break; + } + + time::sleep(Duration::from_secs(1)).await; + } + }; + + timeout(Duration::from_secs(30), device_exists) + .await + .wrap_err("timed out after 30s waiting for modem device to pop back up")?; + + info!("modem detected at /dev/cdc-wdm0"); + + systemd + .restart_service("ModemManager.service") + .await + .wrap_err("restart ModemManager systemd service")?; + + info!("ModemManager restarted!"); + + Ok(()) +} diff --git a/orb-connd/src/modem_manager/cli.rs b/orb-connd/src/modem_manager/cli.rs index a06d3a7e7..93492cec8 100644 --- a/orb-connd/src/modem_manager/cli.rs +++ b/orb-connd/src/modem_manager/cli.rs @@ -182,8 +182,13 @@ fn parse_modem_info(str: &str) -> Result { .as_str() .and_then(|s| s.split("/").last()?.parse().ok()); + let fw_revision = json["modem"]["generic"]["revision"] + .as_str() + .map(|oc| oc.to_string()); + Ok(ModemInfo { imei, + fw_revision, operator_code, operator_name, access_tech, @@ -299,6 +304,7 @@ mod tests { let expected = ModemInfo { imei: "353338976168895".to_string(), + fw_revision: Some("25.30.608 1 [Nov 14 2023 07:00:00]".to_string()), operator_code: Some("26202".to_string()), operator_name: Some("vodafone.de".to_string()), access_tech: Some("lte".to_string()), diff --git a/orb-connd/src/modem_manager/mod.rs b/orb-connd/src/modem_manager/mod.rs index 08bb712cb..4aa1dfce6 100644 --- a/orb-connd/src/modem_manager/mod.rs +++ b/orb-connd/src/modem_manager/mod.rs @@ -38,6 +38,7 @@ pub trait ModemManager: 'static + Send + Sync { #[derive(Debug, Clone, PartialEq)] pub struct ModemInfo { pub imei: String, + pub fw_revision: Option, pub operator_code: Option, pub operator_name: Option, pub access_tech: Option, diff --git a/orb-connd/src/reporters/active_connections_report.rs b/orb-connd/src/reporters/active_connections.rs similarity index 90% rename from orb-connd/src/reporters/active_connections_report.rs rename to orb-connd/src/reporters/active_connections.rs index 0e275e78b..4676451a2 100644 --- a/orb-connd/src/reporters/active_connections_report.rs +++ b/orb-connd/src/reporters/active_connections.rs @@ -4,30 +4,34 @@ use color_eyre::eyre::bail; use color_eyre::Result; use oes::NetworkInterface; use serde::Serializer; +use speare::mini; use std::time::{Duration, Instant}; -use tokio::task::{self, JoinHandle}; use tracing::{error, info}; -pub fn spawn( - nm: NetworkManager, - resolved: Resolved, - rx: flume::Receiver, - zsender: zenorb::Sender, -) -> JoinHandle> { - info!("starting active_connections_report"); - - task::spawn(async move { - while let Ok(conn_event) = rx.recv_async().await { - if let Err(error) = report(&nm, &resolved, conn_event, &zsender).await { - error!(?error, "network health report failed: {error}"); - } - } +pub struct Args { + pub nm: NetworkManager, + pub resolved: Resolved, + pub zsender: zenorb::Sender, +} - Ok(()) - }) +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting active connections reporter"); + let net_state_rx = ctx + .subscribe("net-state") + .inspect_err(|e| error!("failed to subscribe to net-state {e}"))?; + + while let Ok(net_state) = net_state_rx.recv_async().await { + let _ = build_and_send_report(&ctx.nm, &ctx.resolved, net_state, &ctx.zsender) + .await + .inspect_err(|error| { + error!(?error, "active connections report failed: {error}") + }); + } + + Ok(()) } -async fn report( +async fn build_and_send_report( nm: &NetworkManager, resolved: &Resolved, primary_connection: orb_connd_events::Connection, diff --git a/orb-connd/src/reporters/backend_status_cellular_reporter.rs b/orb-connd/src/reporters/backend_status_cellular_reporter.rs deleted file mode 100644 index 128ae1ce3..000000000 --- a/orb-connd/src/reporters/backend_status_cellular_reporter.rs +++ /dev/null @@ -1,58 +0,0 @@ -use crate::{reporters::modem_status::ModemStatus, utils::State}; -use color_eyre::{ - eyre::{eyre, Context}, - Result, -}; -use orb_backend_status_dbus::{types::CellularStatus, BackendStatusProxy}; -use std::time::Duration; -use tokio::{ - task::{self, JoinHandle}, - time, -}; -use tracing::{error, info}; - -pub fn spawn( - conn: zbus::Connection, - modem: State, - report_interval: Duration, -) -> JoinHandle> { - info!("starting backend status cellular reporter"); - task::spawn(async move { - loop { - if let Err(e) = report(&conn, &modem).await { - error!("failed to report to backend status: {e}"); - } - - time::sleep(report_interval).await; - } - }) -} - -async fn report(conn: &zbus::Connection, modem: &State) -> Result<()> { - let be_status = BackendStatusProxy::new(conn) - .await - .wrap_err("Failed to create Backend Status dbus Proxy")?; - - let cellular_status: CellularStatus = modem - .read(|m| { - let signal = &m.signal; - - CellularStatus { - imei: m.imei.clone(), - iccid: m.iccid.clone(), - rat: m.rat.clone(), - operator: m.operator.clone(), - rsrp: signal.rsrp, - rsrq: signal.rsrq, - rssi: signal.rssi, - snr: signal.snr, - } - }) - .map_err(|e| { - eyre!("failed to read ConnectionState from State: {e:?}") - })?; - - be_status.provide_cellular_status(cellular_status).await?; - - Ok(()) -} diff --git a/orb-connd/src/reporters/backend_status_wifi_reporter.rs b/orb-connd/src/reporters/backend_status_wifi_reporter.rs deleted file mode 100644 index 671396729..000000000 --- a/orb-connd/src/reporters/backend_status_wifi_reporter.rs +++ /dev/null @@ -1,143 +0,0 @@ -use crate::network_manager::{Connection, NetworkManager}; -use color_eyre::{eyre::Context, Result}; -use futures::StreamExt; -use orb_backend_status_dbus::{ - types::{ConndReport, WifiNetwork, WifiProfile}, - BackendStatusProxy, -}; -use std::time::Duration; -use tokio::{ - task::{self, JoinHandle}, - time, -}; -use tracing::{error, info, warn}; - -pub fn spawn( - nm: NetworkManager, - session_bus: zbus::Connection, - report_interval: Duration, -) -> JoinHandle> { - info!("starting backend status wifi reporter"); - task::spawn(async move { - if let Err(e) = run_reporter(nm, session_bus, report_interval).await { - error!("wifi reporter task failed: {e}"); - } - - Ok(()) - }) -} - -async fn run_reporter( - nm: NetworkManager, - session_bus: zbus::Connection, - report_interval: Duration, -) -> Result<()> { - let stream_backoff = Duration::from_secs(3); - - let (mut state_stream, mut primary_conn_stream) = loop { - match nm.state_stream().await { - Ok(stream) => match nm.primary_connection_stream().await { - Ok(pcs) => { - info!("Successfully subscribed to NetworkManager streams"); - break (stream, pcs); - } - Err(e) => { - error!("Failed to get primary connection stream: {e}"); - time::sleep(stream_backoff).await; - continue; - } - }, - Err(e) => { - error!("Failed to get state stream: {e}"); - time::sleep(stream_backoff).await; - continue; - } - } - }; - - let mut interval = time::interval(report_interval); - interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); - - loop { - tokio::select! { - _ = state_stream.next() => { - info!("NetworkManager state changed - sending immediate WiFi status"); - } - - _ = primary_conn_stream.next() => { - info!("Primary connection changed - sending immediate WiFi status"); - } - - _ = interval.tick() => {} - }; - - if let Err(e) = report(&nm, &session_bus).await { - error!("failed to report to backend status: {e}"); - } - } -} - -async fn report(nm: &NetworkManager, session_bus: &zbus::Connection) -> Result<()> { - let be_status = BackendStatusProxy::new(session_bus) - .await - .wrap_err("Failed to create Backend Status dbus Proxy")?; - - let primary_conn = nm - .primary_connection() - .await - .inspect_err(|e| warn!("failed to get primary connection: {e}")) - .unwrap_or_default(); - - let (egress_iface, active_wifi_profile) = match primary_conn { - Some(Connection::Cellular { .. }) => (Some("wwan0".into()), None), - Some(Connection::Wifi { ssid }) => (Some("wlan0".into()), Some(ssid)), - Some(Connection::Ethernet) => (Some("eth0".into()), None), - None => (None, None), - }; - - let saved_wifi_profiles = nm - .list_wifi_profiles() - .await - .inspect_err(|e| warn!("failed to list wifi profiles: {e}")) - .unwrap_or_default() - .into_iter() - .map(|profile| WifiProfile { - ssid: profile.ssid, - sec: profile.sec.to_string(), - }) - .collect(); - - let scanned_networks: Vec = nm - .wifi_scan() - .await - .inspect_err(|e| warn!("failed to scan wifi: {e}")) - .unwrap_or_default() - .into_iter() - .map(|ap| WifiNetwork { - bssid: ap.bssid, - ssid: ap.ssid, - frequency: ap.freq_mhz, - signal_level: ap.rssi.unwrap_or_default(), - }) - .collect(); - - let _ = async { - be_status - .provide_connd_report(ConndReport { - egress_iface, - wifi_enabled: nm.wifi_enabled().await?, - smart_switching: nm.smart_switching_enabled().await?, - airplane_mode: false, // not implemented yet - active_wifi_profile, - saved_wifi_profiles, - scanned_networks, - }) - .await?; - - Ok::<(), color_eyre::Report>(()) - } - .await - .inspect_err(|e| warn!("failed to provide connd report to backend status: {e}")); - - Ok(()) -} diff --git a/orb-connd/src/reporters/cellular_status.rs b/orb-connd/src/reporters/cellular_status.rs new file mode 100644 index 000000000..d5d391ef8 --- /dev/null +++ b/orb-connd/src/reporters/cellular_status.rs @@ -0,0 +1,63 @@ +use crate::modem; +use color_eyre::{ + eyre::{eyre, Context}, + Result, +}; +use orb_backend_status_dbus::{types::CellularStatus, BackendStatusProxy}; +use speare::mini; +use tracing::{error, info}; + +pub struct Args { + pub dbus: zbus::Connection, + pub zsender: zenorb::Sender, +} + +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting cellular status reporter"); + let run = async || -> Result<()> { + let snapshot_rx = ctx.subscribe::("modem-snapshot")?; + + loop { + let modem = snapshot_rx.recv_async().await?; + let signal = modem.signal; + + let be_status = BackendStatusProxy::new(&ctx.dbus) + .await + .wrap_err("Failed to create Backend Status dbus Proxy")?; + + // TODO: move this to oes crate once we deprecate keeping this in backend-status state + let cell_status = CellularStatus { + imei: modem.imei, + fw_revision: modem.fw_revision, + iccid: modem.iccid, + rat: modem.rat, + operator: modem.operator, + rsrp: signal.rsrp, + rsrq: signal.rsrq, + rssi: signal.rssi, + snr: signal.snr, + }; + + let payload = serde_json::to_string(&cell_status) + .wrap_err("failed to serialize CellularStatus")?; + + let zenoh_err = ctx + .zsender + .publisher("oes/cellular_status")? + .put(payload) + .await + .map_err(|e| { + eyre!("failed to send oes/cellular_status zenoh payload, err: {e}") + }); + + let dbus_err = be_status.provide_cellular_status(cell_status).await; + + zenoh_err?; + dbus_err?; + } + }; + + run() + .await + .inspect_err(|e| error!("backend status cellular reporter failed with: {e:?}")) +} diff --git a/orb-connd/src/reporters/connd_report.rs b/orb-connd/src/reporters/connd_report.rs new file mode 100644 index 000000000..b9a4a96c5 --- /dev/null +++ b/orb-connd/src/reporters/connd_report.rs @@ -0,0 +1,99 @@ +use crate::network_manager::{Connection, NetworkManager}; +use color_eyre::{eyre::Context, Result}; +use flume::Receiver; +use orb_backend_status_dbus::{ + types::{ConndReport, WifiNetwork, WifiProfile}, + BackendStatusProxy, +}; +use speare::mini; +use std::time::Duration; +use tokio::time; +use tracing::{info, warn}; + +pub struct Args { + pub nm: NetworkManager, + pub session_bus: zbus::Connection, + pub report_interval: Duration, +} + +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting connd report reporter"); + + async { + let net_state_rx: Receiver = + ctx.subscribe("net-state")?; + + let mut interval = time::interval(ctx.report_interval); + interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); + + loop { + tokio::select! { + Ok(_) = net_state_rx.recv_async() => {} + _ = interval.tick() => {} + }; + + let be_status = BackendStatusProxy::new(&ctx.session_bus) + .await + .wrap_err("Failed to create Backend Status dbus Proxy")?; + + let primary_conn = ctx + .nm + .primary_connection() + .await + .inspect_err(|e| warn!("failed to get primary connection: {e}")) + .unwrap_or_default(); + + let (egress_iface, active_wifi_profile) = match primary_conn { + Some(Connection::Cellular { .. }) => (Some("wwan0".into()), None), + Some(Connection::Wifi { ssid }) => (Some("wlan0".into()), Some(ssid)), + Some(Connection::Ethernet) => (Some("eth0".into()), None), + None => (None, None), + }; + + let saved_wifi_profiles = ctx + .nm + .list_wifi_profiles() + .await + .inspect_err(|e| warn!("failed to list wifi profiles: {e}")) + .unwrap_or_default() + .into_iter() + .map(|profile| WifiProfile { + ssid: profile.ssid, + sec: profile.sec.to_string(), + }) + .collect(); + + let scanned_networks: Vec = ctx + .nm + .wifi_scan() + .await + .inspect_err(|e| warn!("failed to scan wifi: {e}")) + .unwrap_or_default() + .into_iter() + .map(|ap| WifiNetwork { + bssid: ap.bssid, + ssid: ap.ssid, + frequency: ap.freq_mhz, + signal_level: ap.rssi.unwrap_or_default(), + }) + .collect(); + + be_status + .provide_connd_report(ConndReport { + egress_iface, + wifi_enabled: ctx.nm.wifi_enabled().await?, + smart_switching: ctx.nm.smart_switching_enabled().await?, + airplane_mode: false, // not implemented yet + active_wifi_profile, + saved_wifi_profiles, + scanned_networks, + }) + .await?; + } + + #[allow(unreachable_code)] + Ok::<(), color_eyre::Report>(()) + } + .await + .inspect_err(|e| warn!("failed to provide connd report to backend status: {e}")) +} diff --git a/orb-connd/src/reporters/datadog.rs b/orb-connd/src/reporters/datadog.rs new file mode 100644 index 000000000..6e315643e --- /dev/null +++ b/orb-connd/src/reporters/datadog.rs @@ -0,0 +1,110 @@ +use crate::{modem, reporters::net_stats::NetStats, statsd::StatsdClient}; +use color_eyre::Result; +use flume::Receiver; +use speare::mini; +use std::{collections::HashMap, sync::Arc}; +use tracing::{info, warn}; + +pub struct Args { + pub statsd: Arc, +} + +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting datadog reporter"); + + async { + let modem_snapshot_rx: Receiver = + ctx.subscribe("modem-snapshot")?; + + let netstats_rx: Receiver> = ctx.subscribe("netstats")?; + let mut netstats_map: HashMap = HashMap::new(); + + loop { + tokio::select! { + Ok(snapshot) = modem_snapshot_rx.recv_async() => { + report_modem(ctx.statsd.as_ref(), snapshot).await?; + } + + Ok(all_netstats) = netstats_rx.recv_async() => { + for new_netstats in all_netstats { + let old_netstats = netstats_map.remove(&new_netstats.iface) + .unwrap_or_else(|| new_netstats.clone()); + + report_netstats(ctx.statsd.as_ref(), &old_netstats, &new_netstats).await?; + + netstats_map.insert(new_netstats.iface.clone(), new_netstats); + } + + } + } + } + + #[allow(unreachable_code)] + Ok(()) + } + .await + .inspect_err(|e| warn!("failure reporting to datadog {e:?}")) +} + +async fn report_modem(statsd: &dyn StatsdClient, m: modem::Snapshot) -> Result<()> { + let sig = m.signal; + + let gauges = vec![ + ("orb.lte.signal.rsrp", sig.rsrp), + ("orb.lte.signal.rsrq", sig.rsrq), + ("orb.lte.signal.rssi", sig.rssi), + ("orb.lte.signal.snr", sig.snr), + ]; + + if m.state.is_online() { + let heartbeat_tags: Vec = [ + m.rat.map(|r| format!("rat:{r}")), + m.operator.map(|o| format!("operator:{o}")), + ] + .into_iter() + .flatten() + .collect(); + + statsd.count("orb.lte.heartbeat", 1, heartbeat_tags).await?; + } + + for (name, v) in gauges + .into_iter() + .filter_map(|(name, opt)| opt.map(|v| (name, v))) + { + statsd.gauge(name, &v.to_string(), Vec::new()).await?; + } + + Ok(()) +} + +async fn report_netstats( + statsd: &dyn StatsdClient, + old_netstats: &NetStats, + new_netstats: &NetStats, +) -> Result<()> { + let rx_bytes = new_netstats.rx_bytes - old_netstats.rx_bytes; + let tx_bytes = new_netstats.tx_bytes - old_netstats.tx_bytes; + + if rx_bytes == 0 && tx_bytes == 0 { + return Ok(()); + } + + statsd + .incr_by_value( + &format!("orb.{}.net.rx_bytes_delta", new_netstats.iface), + rx_bytes as i64, + Vec::new(), + ) + .await?; + + statsd + .incr_by_value( + &format!("orb.{}.net.tx_bytes_delta", new_netstats.iface), + tx_bytes as i64, + Vec::new(), + ) + .await?; + + Ok(()) +} diff --git a/orb-connd/src/reporters/dd_modem_reporter.rs b/orb-connd/src/reporters/dd_modem_reporter.rs deleted file mode 100644 index 3fece6096..000000000 --- a/orb-connd/src/reporters/dd_modem_reporter.rs +++ /dev/null @@ -1,114 +0,0 @@ -use crate::{ - reporters::{modem_status::ModemStatus, net_stats::NetStats}, - statsd::StatsdClient, - utils::State, -}; -use color_eyre::{eyre::eyre, Result}; -use std::time::Duration; -use tokio::{ - task::{self, JoinHandle}, - time, -}; -use tracing::{error, info}; - -const NO_TAGS: &[&str] = &[]; - -pub fn spawn( - modem_status: State, - statsd_client: impl StatsdClient, - report_interval: Duration, -) -> JoinHandle> { - info!("starting dd reporter"); - task::spawn(async move { - info!("successfully created dogstatd::Client"); - - let mut prev_net_stats = modem_status - .read(|m| m.net_stats.clone()) - .map_err(|e| eyre!("dd_repoter::start, modem.read: {e}"))?; - - loop { - if let Err(e) = - report(modem_status.clone(), &statsd_client, &mut prev_net_stats).await - { - error!("failed to report to backend status: {e}"); - } - - time::sleep(report_interval).await; - } - }) -} - -async fn report( - modem_status: State, - statsd_client: &impl StatsdClient, - prev_net_stats: &mut NetStats, -) -> Result<()> { - let (state, rat, operator, gauges, new_net_stats) = modem_status - .read(|m| { - let sig = &m.signal; - - let gauges = vec![ - ("orb.lte.signal.rsrp", sig.rsrp), - ("orb.lte.signal.rsrq", sig.rsrq), - ("orb.lte.signal.rssi", sig.rssi), - ("orb.lte.signal.snr", sig.snr), - ]; - - ( - m.state.clone(), - m.rat.clone(), - m.operator.clone(), - gauges, - m.net_stats.clone(), - ) - }) - .map_err(|e| { - eyre!("failed to read ConnectionState from State: {e:?}") - })?; - - let net_stats_delta = NetStats { - rx_bytes: new_net_stats.rx_bytes - prev_net_stats.rx_bytes, - tx_bytes: new_net_stats.tx_bytes - prev_net_stats.tx_bytes, - }; - - *prev_net_stats = new_net_stats; - - if state.is_online() { - let heartbeat_tags: Vec = [ - rat.map(|r| format!("rat:{r}")), - operator.map(|o| format!("operator:{o}")), - ] - .into_iter() - .flatten() - .collect(); - - statsd_client - .count("orb.lte.heartbeat", 1, heartbeat_tags.as_ref()) - .await?; - } - - for (name, v) in gauges - .into_iter() - .filter_map(|(name, opt)| opt.map(|v| (name, v))) - { - statsd_client.gauge(name, &v.to_string(), NO_TAGS).await?; - } - - statsd_client - .incr_by_value( - "orb.lte.net.rx_bytes_delta", - net_stats_delta.rx_bytes as i64, - NO_TAGS, - ) - .await?; - - statsd_client - .incr_by_value( - "orb.lte.net.tx_bytes_delta", - net_stats_delta.tx_bytes as i64, - NO_TAGS, - ) - .await?; - - Ok(()) -} diff --git a/orb-connd/src/reporters/mod.rs b/orb-connd/src/reporters/mod.rs index f1a1fab57..7f1512abf 100644 --- a/orb-connd/src/reporters/mod.rs +++ b/orb-connd/src/reporters/mod.rs @@ -1,134 +1,90 @@ use crate::{ - modem_manager::ModemManager, - network_manager::NetworkManager, - reporters::modem_status::ModemStatus, - resolved::Resolved, - statsd::StatsdClient, - utils::{retry_for, State}, - OrbCapabilities, Tasks, + network_manager::NetworkManager, resolved::Resolved, statsd::StatsdClient, }; -use color_eyre::{eyre::ContextCompat, Result}; -use net_stats::NetStats; -use std::{ - path::{Path, PathBuf}, - sync::Arc, - time::Duration, -}; -use tracing::{error, info, warn}; +use color_eyre::Result; +use speare::{mini::OnErr, Backoff, Limit}; +use std::{path::PathBuf, sync::Arc, time::Duration}; +use tracing::info; -pub mod active_connections_report; -pub mod backend_status_cellular_reporter; -pub mod backend_status_wifi_reporter; -pub mod dd_modem_reporter; -pub mod modem_monitor; -pub mod modem_status; -pub mod net_changed_reporter; +pub mod active_connections; +pub mod cellular_status; +pub mod connd_report; +pub mod datadog; +pub mod net_state; pub mod net_stats; #[allow(clippy::too_many_arguments)] pub async fn spawn( + speare: &speare::mini::Ctx<()>, nm: NetworkManager, resolved: Resolved, session_bus: zbus::Connection, - modem_manager: Arc, - statsd_client: impl StatsdClient, + statsd: Arc, sysfs: PathBuf, - cap: OrbCapabilities, zsender: zenorb::Sender, -) -> Tasks { +) -> Result<()> { info!("starting reporter tasks"); - let (health_tx, health_rx) = flume::unbounded(); - - let mut tasks = vec![ - backend_status_wifi_reporter::spawn( - nm.clone(), - session_bus.clone(), - Duration::from_secs(30), - ), - net_changed_reporter::spawn(nm.clone(), zsender.clone(), health_tx), - active_connections_report::spawn(nm, resolved, health_rx, zsender), - ]; + speare + .task_with() + .args(net_state::Args { + nm: nm.clone(), + zsender: zsender.clone(), + }) + .on_err(static_backoff(15)) + .spawn(net_state::report)?; - if let OrbCapabilities::CellularAndWifi = cap { - info!("reporter getting initial modem information"); - let modem_status_timeout = Duration::from_secs(120); - let modem_status = match retry_for( - modem_status_timeout, - Duration::from_secs(10), - || make_modem_status(&modem_manager, &sysfs), - ) - .await - { - Ok(ms) => ms, - Err(error) => { - error!(?error, "could not retrieve modem_status after {}s. modem reporting will be disabled", modem_status_timeout.as_secs()); - return tasks; - } - }; + speare + .task_with() + .args(cellular_status::Args { + dbus: session_bus.clone(), + zsender: zsender.clone(), + }) + .on_err(static_backoff(15)) + .spawn(cellular_status::report)?; - tasks.extend([ - modem_monitor::spawn( - modem_manager, - modem_status.clone(), - sysfs, - Duration::from_secs(20), - ), - backend_status_cellular_reporter::spawn( - session_bus, - modem_status.clone(), - Duration::from_secs(30), - ), - dd_modem_reporter::spawn( - modem_status, - statsd_client, - Duration::from_secs(20), - ), - ]); - } - - tasks -} + speare + .task_with() + .args(net_stats::Args { + poll_interval: Duration::from_secs(30), + sysfs, + zsender: zsender.clone(), + }) + .on_err(static_backoff(15)) + .spawn(net_stats::report)?; -async fn make_modem_status( - mm: &Arc, - sysfs: impl AsRef, -) -> Result> { - let modem_status: Result = async { - let modem = mm - .list_modems() - .await? - .into_iter() - .next() - .wrap_err("couldn't find a modem")?; + speare + .task_with() + .args(datadog::Args { statsd }) + .on_err(static_backoff(15)) + .spawn(datadog::report)?; - let modem_info = mm.modem_info(&modem.id).await?; + speare + .task_with() + .args(connd_report::Args { + nm: nm.clone(), + session_bus, + report_interval: Duration::from_secs(30), + }) + .on_err(static_backoff(15)) + .spawn(connd_report::report)?; - let iccid = match modem_info.sim { - None => None, - Some(sim_id) => { - let sim_info = mm.sim_info(&sim_id).await?; + speare + .task_with() + .args(active_connections::Args { + nm, + resolved, + zsender, + }) + .on_err(static_backoff(15)) + .spawn(active_connections::report)?; - Some(sim_info.iccid) - } - }; - - if let Err(e) = mm.signal_setup(&modem.id, Duration::from_secs(10)).await { - warn!("could not update modem signal refresh rate: {e}"); - } - - let net_stats = NetStats::collect(sysfs, "wwan0").await?; + Ok(()) +} - Ok(ModemStatus::new( - modem.id, - iccid, - modem_info.imei, - modem_info.state, - net_stats, - )) +fn static_backoff(seconds: u64) -> OnErr { + OnErr::Restart { + max: Limit::None, + backoff: Backoff::Static(Duration::from_secs(seconds)), } - .await - .inspect_err(|e| error!("make_modem_status: {e}")); - - Ok(State::new(modem_status?)) } diff --git a/orb-connd/src/reporters/modem_monitor.rs b/orb-connd/src/reporters/modem_monitor.rs deleted file mode 100644 index 1cc472fff..000000000 --- a/orb-connd/src/reporters/modem_monitor.rs +++ /dev/null @@ -1,78 +0,0 @@ -use super::modem_status::ModemStatus; -use crate::modem_manager::ModemManager; -use crate::reporters::net_stats::NetStats; -use crate::utils::State; -use color_eyre::eyre::{eyre, ContextCompat}; -use color_eyre::Result; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; -use tokio::task::{self, JoinHandle}; -use tokio::time::{self}; -use tracing::{error, info}; - -pub fn spawn( - mm: Arc, - modem: State, - sysfs: PathBuf, - poll_interval: Duration, -) -> JoinHandle> { - info!("starting modem monitor"); - - task::spawn(async move { - loop { - if let Err(e) = update_modem(&mm, &modem, &sysfs).await { - error!("failed to update modem: {e}"); - } - - time::sleep(poll_interval).await; - } - }) -} - -async fn update_modem( - mm: &Arc, - modem_status: &State, - sysfs: impl AsRef, -) -> Result<()> { - let current_modem_id = modem_status.read(|ms| ms.id.clone()).map_err(|e| { - eyre!("failed to read ConnectionState from State: {e:?}") - })?; - - let modem = mm - .list_modems() - .await? - .into_iter() - .next() - .wrap_err("could not find a modem")?; - - // modem has most likely power cycled, enable signals refresh again - if modem.id != current_modem_id { - mm.signal_setup(&modem.id, Duration::from_secs(10)).await?; - } - - let modem_info = mm.modem_info(&modem.id).await?; - let signal = mm.signal_get(&modem.id).await?; - let location = mm.location_get(&modem.id).await?; - - let net_stats = NetStats::collect(sysfs, "wwan0") - .await - .inspect_err(|e| error!("NetStats from wwan0: err {e}")); - - modem_status - .write(move |ms| { - ms.id = modem.id; - ms.state = modem_info.state; - ms.rat = modem_info.access_tech; - ms.operator = modem_info.operator_name; - ms.signal = signal; - ms.location = location; - - if let Ok(stats) = net_stats { - ms.net_stats = stats; - } - }) - .map_err(|e| eyre!("failed to write to State: {e:?}"))?; - - Ok(()) -} diff --git a/orb-connd/src/reporters/modem_status.rs b/orb-connd/src/reporters/modem_status.rs deleted file mode 100644 index a75a74db0..000000000 --- a/orb-connd/src/reporters/modem_status.rs +++ /dev/null @@ -1,39 +0,0 @@ -use super::net_stats::NetStats; -use crate::modem_manager::{ - connection_state::ConnectionState, Location, ModemId, Signal, -}; - -pub struct ModemStatus { - pub id: ModemId, - pub iccid: Option, - pub imei: String, - /// Radio Access Technology -- e.g.: gsm, lte - pub rat: Option, - pub operator: Option, - pub state: ConnectionState, - pub signal: Signal, - pub location: Location, - pub net_stats: NetStats, -} - -impl ModemStatus { - pub fn new( - id: ModemId, - iccid: Option, - imei: String, - state: ConnectionState, - net_stats: NetStats, - ) -> Self { - Self { - id, - iccid, - imei, - rat: None, - operator: None, - state, - signal: Signal::default(), - location: Location::default(), - net_stats, - } - } -} diff --git a/orb-connd/src/reporters/net_changed_reporter.rs b/orb-connd/src/reporters/net_changed_reporter.rs deleted file mode 100644 index 66fec8fb4..000000000 --- a/orb-connd/src/reporters/net_changed_reporter.rs +++ /dev/null @@ -1,114 +0,0 @@ -use crate::network_manager::{Connection, NetworkManager}; -use color_eyre::{eyre::eyre, Result}; -use futures::StreamExt; -use orb_connd_events::ConnectionKind; -use rusty_network_manager::dbus_interface_types::NMState; -use std::time::Duration; -use tokio::{ - task::{self, JoinHandle}, - time, -}; -use tracing::{error, info, warn}; - -static BACKOFF: Duration = Duration::from_secs(5); - -pub fn spawn( - nm: NetworkManager, - zsender: zenorb::Sender, - health_tx: flume::Sender, -) -> JoinHandle> { - info!("starting net_changed reporter"); - - task::spawn(async move { - loop { - if let Err(e) = report_loop(&nm, &zsender, &health_tx).await { - error!(error = ?e, "net changed loop error, retrying in {}s. error: {e}", BACKOFF.as_secs()); - } - - time::sleep(BACKOFF).await; - } - }) -} - -async fn report_loop( - nm: &NetworkManager, - zsender: &zenorb::Sender, - health_tx: &flume::Sender, -) -> Result<()> { - let publisher = zsender.publisher("net/changed")?; - let mut state_stream = nm.state_stream().await?; - let mut primary_conn_stream = nm.primary_connection_stream().await?; - - let nm_state = nm.state().await?; - let mut conn_event = connection_event(nm_state, nm.primary_connection().await?); - - let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; - publisher - .put(bytes.into_vec()) - .await - .map_err(|e| eyre!("{e}"))?; - - if is_connected(&conn_event) - && let Err(e) = health_tx.send(conn_event.clone()) - { - warn!(error = ?e, "failed to send health report event"); - } - - loop { - tokio::select! { - _ = state_stream.next() => {} - _ = primary_conn_stream.next() => {} - }; - - let new_conn_event = - connection_event(nm.state().await?, nm.primary_connection().await?); - - let changed = conn_event != new_conn_event; - conn_event = new_conn_event; - - if changed { - let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; - publisher - .put(bytes.into_vec()) - .await - .map_err(|e| eyre!("{e}"))?; - - if is_connected(&conn_event) - && let Err(e) = health_tx.send(conn_event.clone()) - { - warn!(error = ?e, "failed to send health report event"); - } - } - } -} - -fn connection_event( - state: NMState, - active_conn: Option, -) -> orb_connd_events::Connection { - use orb_connd_events::Connection::*; - let kind = active_conn.map(|c| match c { - Connection::Cellular { apn } => ConnectionKind::Cellular { apn }, - Connection::Wifi { ssid } => ConnectionKind::Wifi { ssid }, - Connection::Ethernet => ConnectionKind::Ethernet, - }); - - match (state, kind) { - (NMState::CONNECTED_GLOBAL, Some(kind)) => ConnectedGlobal(kind), - (NMState::CONNECTED_SITE, Some(kind)) => ConnectedSite(kind), - (NMState::CONNECTED_LOCAL, Some(kind)) => ConnectedLocal(kind), - (NMState::CONNECTING, _) => Connecting, - (NMState::DISCONNECTING, _) => Disconnecting, - (NMState::UNKNOWN | NMState::ASLEEP | NMState::DISCONNECTED, _) => Disconnected, - _ => Disconnected, - } -} - -fn is_connected(conn_event: &orb_connd_events::Connection) -> bool { - matches!( - conn_event, - orb_connd_events::Connection::ConnectedGlobal(_) - | orb_connd_events::Connection::ConnectedSite(_) - | orb_connd_events::Connection::ConnectedLocal(_) - ) -} diff --git a/orb-connd/src/reporters/net_state.rs b/orb-connd/src/reporters/net_state.rs new file mode 100644 index 000000000..94462519e --- /dev/null +++ b/orb-connd/src/reporters/net_state.rs @@ -0,0 +1,80 @@ +use crate::network_manager::{Connection, NetworkManager}; +use color_eyre::{eyre::eyre, Result}; +use futures::StreamExt; +use orb_connd_events::ConnectionKind; +use rusty_network_manager::dbus_interface_types::NMState; +use speare::mini; +use tracing::{info, warn}; + +pub struct Args { + pub nm: NetworkManager, + pub zsender: zenorb::Sender, +} + +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting netstate reporter"); + + let publisher = ctx.zsender.publisher("net/changed")?; + let mut state_stream = ctx.nm.state_stream().await?; + let mut primary_conn_stream = ctx.nm.primary_connection_stream().await?; + + let nm_state = ctx.nm.state().await?; + let mut conn_event = connection_event(nm_state, ctx.nm.primary_connection().await?); + + let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; + publisher + .put(bytes.into_vec()) + .await + .map_err(|e| eyre!("{e}"))?; + + let _ = ctx + .publish("net-state", conn_event.clone()) + .inspect_err(|e| warn!(error = ?e, "failed to send net state event")); + + loop { + tokio::select! { + _ = state_stream.next() => {} + _ = primary_conn_stream.next() => {} + }; + + let new_conn_event = + connection_event(ctx.nm.state().await?, ctx.nm.primary_connection().await?); + + let changed = conn_event != new_conn_event; + conn_event = new_conn_event; + + if changed { + let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; + publisher + .put(bytes.into_vec()) + .await + .map_err(|e| eyre!("{e}"))?; + + let _ = ctx + .publish("net-state", conn_event.clone()) + .inspect_err(|e| warn!(error = ?e, "failed to send net state event")); + } + } +} + +fn connection_event( + state: NMState, + active_conn: Option, +) -> orb_connd_events::Connection { + use orb_connd_events::Connection::*; + let kind = active_conn.map(|c| match c { + Connection::Cellular { apn } => ConnectionKind::Cellular { apn }, + Connection::Wifi { ssid } => ConnectionKind::Wifi { ssid }, + Connection::Ethernet => ConnectionKind::Ethernet, + }); + + match (state, kind) { + (NMState::CONNECTED_GLOBAL, Some(kind)) => ConnectedGlobal(kind), + (NMState::CONNECTED_SITE, Some(kind)) => ConnectedSite(kind), + (NMState::CONNECTED_LOCAL, Some(kind)) => ConnectedLocal(kind), + (NMState::CONNECTING, _) => Connecting, + (NMState::DISCONNECTING, _) => Disconnecting, + (NMState::UNKNOWN | NMState::ASLEEP | NMState::DISCONNECTED, _) => Disconnected, + _ => Disconnected, + } +} diff --git a/orb-connd/src/reporters/net_stats.rs b/orb-connd/src/reporters/net_stats.rs index 9a11b4508..4a0a2ccdd 100644 --- a/orb-connd/src/reporters/net_stats.rs +++ b/orb-connd/src/reporters/net_stats.rs @@ -1,23 +1,102 @@ -use color_eyre::Result; -use serde::Serialize; -use std::path::Path; -use tokio::fs; +use color_eyre::{ + eyre::{Context, ContextCompat}, + Result, +}; +use serde::{Deserialize, Serialize}; +use speare::mini; +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; +use tokio::{fs, time}; +use tracing::{info, warn}; -#[derive(Debug, Serialize, Clone)] +pub struct Args { + pub poll_interval: Duration, + pub sysfs: PathBuf, + pub zsender: zenorb::Sender, +} + +pub async fn report(ctx: mini::Ctx) -> Result<()> { + info!("starting netstats reporter"); + let mut update_interval = time::interval(ctx.poll_interval); + + loop { + update_interval.tick().await; + + let ifaces = iface_paths(&ctx.sysfs) + .await + .inspect_err(|e| warn!("failed reading network ifaces from sysfs: {e}"))?; + + let mut all_stats = Vec::new(); + + for iface_path in ifaces { + match NetStats::collect(&iface_path).await { + Err(e) => { + warn!("faield to collectn netstats on {iface_path:?}, err: {e:?}") + } + + Ok(stats) => { + all_stats.push(stats); + } + } + } + + let payload = serde_json::to_string(&all_stats) + .wrap_err("failed to serialze netstats")?; + + let _ = ctx.publish("netstats", all_stats.clone()); + + let _ = ctx + .zsender + .publisher("oes/netstats")? + .put(payload) + .await + .inspect_err(|e| { + warn!("failed to publish oes/netstats on zenoh, err: {e:?}",) + }); + } +} + +async fn iface_paths(sysfs: &Path) -> Result> { + let ifaces_dir = sysfs.join("class").join("net"); + let mut dir = fs::read_dir(ifaces_dir).await?; + + let mut paths = Vec::new(); + while let Ok(Some(entry)) = dir.next_entry().await { + let path = entry.path(); + + let file = path + .file_name() + .and_then(|x| x.to_str()) + .unwrap_or_default(); + + if file.starts_with("eth") + || file.starts_with("wwan") + || file.starts_with("wlan") + { + paths.push(path) + } + } + + Ok(paths) +} + +#[derive(Debug, Serialize, Deserialize, Clone)] pub struct NetStats { + pub iface: String, pub tx_bytes: u64, pub rx_bytes: u64, } impl NetStats { - pub async fn collect(sysfs: impl AsRef, iface: &str) -> Result { - let stats_path = sysfs - .as_ref() - .join("class") - .join("net") - .join(iface) - .join("statistics"); + pub async fn collect(iface_path: &PathBuf) -> Result { + let iface = iface_path + .file_name() + .and_then(|f| f.to_str()) + .wrap_err_with(|| format!("err reading iface name from {iface_path:?}"))?; + let stats_path = iface_path.join("statistics"); let tx_path = stats_path.join("tx_bytes"); let rx_path = stats_path.join("rx_bytes"); @@ -29,6 +108,10 @@ impl NetStats { .trim() .parse()?; - Ok(NetStats { tx_bytes, rx_bytes }) + Ok(NetStats { + iface: iface.into(), + tx_bytes, + rx_bytes, + }) } } diff --git a/orb-connd/src/service/mod.rs b/orb-connd/src/service/mod.rs index 203e55138..60361c6c7 100644 --- a/orb-connd/src/service/mod.rs +++ b/orb-connd/src/service/mod.rs @@ -16,7 +16,6 @@ use std::path::Path; use std::time::Duration; use tokio::fs::{self, File}; use tokio::io::{self}; -use tokio::task::{self, JoinHandle}; use tracing::{error, info, warn}; use wifi::Auth; use wpa_conf::LegacyWpaConfig; @@ -121,28 +120,31 @@ impl ConndService { warn!(?error, "non fatal startup failure") } + match connd.nm.list_wifi_profiles().await { + Ok(profiles) => info!("saved wifi profiles: {}", profiles.len()), + Err(e) => warn!("failed to read wifi profiles on startup with err {e:?}"), + }; + Ok(connd) } - pub fn spawn(self) -> JoinHandle> { - info!("spawning dbus service {SERVICE} at path {OBJ_PATH}!"); + pub async fn spawn(self) -> Result<()> { let conn = self.session_dbus.clone(); + info!("spawning dbus service {SERVICE} at path {OBJ_PATH}!"); - task::spawn(async move { - conn.request_name(SERVICE) - .await - .inspect_err(|e| error!("failed to request name on dbus {e}"))?; + conn.request_name(SERVICE) + .await + .inspect_err(|e| error!("failed to request name on dbus {e}"))?; - conn.object_server() - .at(OBJ_PATH, Connd::from(self)) - .await - .inspect_err(|e| error!("failed to serve obj on dbus {e}"))?; + conn.object_server() + .at(OBJ_PATH, Connd::from(self)) + .await + .inspect_err(|e| error!("failed to serve obj on dbus {e}"))?; - info!("dbus service spawned successfully!"); - futures::future::pending::<()>().await; + info!("dbus service spawned successfully!"); + futures::future::pending::<()>().await; - Ok(()) - }) + Ok(()) } async fn wifi_profile_add( diff --git a/orb-connd/src/statsd/dd.rs b/orb-connd/src/statsd/dd.rs index 30198c390..d6004f058 100644 --- a/orb-connd/src/statsd/dd.rs +++ b/orb-connd/src/statsd/dd.rs @@ -1,3 +1,4 @@ +use async_trait::async_trait; use color_eyre::Result; use dogstatsd::DogstatsdResult; use flume::Sender; @@ -68,55 +69,46 @@ impl DogstatsdClient { } } +#[async_trait] impl StatsdClient for DogstatsdClient { - async fn count + Sync + Send>( - &self, - stat: &str, - count: i64, - tags: &[S], - ) -> Result<()> { + async fn count(&self, stat: &str, count: i64, tags: Vec) -> Result<()> { let (reply, rx) = oneshot::channel(); self.tx.send(Msg::Count { stat: stat.to_string(), count, - tags: tags.iter().map(|x| x.as_ref().to_string()).collect(), + tags, reply, })?; Ok(rx.await??) } - async fn incr_by_value + Sync + Send>( + async fn incr_by_value( &self, stat: &str, value: i64, - tags: &[S], + tags: Vec, ) -> Result<()> { let (reply, rx) = oneshot::channel(); self.tx.send(Msg::IncrByValue { stat: stat.to_string(), value, - tags: tags.iter().map(|x| x.as_ref().to_string()).collect(), + tags, reply, })?; Ok(rx.await??) } - async fn gauge + Sync + Send>( - &self, - stat: &str, - val: &str, - tags: &[S], - ) -> Result<()> { + async fn gauge(&self, stat: &str, val: &str, tags: Vec) -> Result<()> { let (reply, rx) = oneshot::channel(); self.tx.send(Msg::Gauge { stat: stat.to_string(), val: val.to_string(), - tags: tags.iter().map(|x| x.as_ref().to_string()).collect(), + tags, reply, })?; diff --git a/orb-connd/src/statsd/mod.rs b/orb-connd/src/statsd/mod.rs index 4e7782a0e..7f058488d 100644 --- a/orb-connd/src/statsd/mod.rs +++ b/orb-connd/src/statsd/mod.rs @@ -1,26 +1,18 @@ +use async_trait::async_trait; use color_eyre::Result; pub mod dd; +#[async_trait] pub trait StatsdClient: 'static + Send + Sync { - fn count + Sync + Send>( - &self, - stat: &str, - count: i64, - tags: &[S], - ) -> impl Future> + Send + Sync; + async fn count(&self, stat: &str, count: i64, tags: Vec) -> Result<()>; - fn incr_by_value + Sync + Send>( + async fn incr_by_value( &self, stat: &str, value: i64, - tags: &[S], - ) -> impl Future> + Send + Sync; + tags: Vec, + ) -> Result<()>; - fn gauge + Sync + Send>( - &self, - stat: &str, - val: &str, - tags: &[S], - ) -> impl Future> + Send + Sync; + async fn gauge(&self, stat: &str, val: &str, tags: Vec) -> Result<()>; } diff --git a/orb-connd/src/systemd.rs b/orb-connd/src/systemd.rs new file mode 100644 index 000000000..f96128826 --- /dev/null +++ b/orb-connd/src/systemd.rs @@ -0,0 +1,22 @@ +use color_eyre::Result; +use zbus_systemd::systemd1::ManagerProxy; + +#[derive(Clone)] +pub struct Systemd { + system_bus: zbus::Connection, +} + +impl Systemd { + pub fn new(system_bus: zbus::Connection) -> Self { + Self { system_bus } + } + + pub async fn restart_service(&self, unit: &str) -> Result<()> { + let manager = ManagerProxy::new(&self.system_bus).await?; + let _ = manager + .restart_unit(unit.to_string(), "replace".to_string()) + .await?; + + Ok(()) + } +} diff --git a/orb-connd/src/utils.rs b/orb-connd/src/utils.rs index 9f6a3296b..94877492f 100644 --- a/orb-connd/src/utils.rs +++ b/orb-connd/src/utils.rs @@ -1,12 +1,6 @@ use color_eyre::{eyre::eyre, Result}; -use std::{ - sync::{Arc, PoisonError, RwLock, RwLockReadGuard, RwLockWriteGuard}, - time::Duration, -}; -use tokio::{ - process::Command, - time::{self, Instant}, -}; +use std::sync::{Arc, PoisonError, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use tokio::process::Command; use zbus::fdo; pub async fn run_cmd(cmd: &str, args: &[&str]) -> Result { @@ -58,27 +52,6 @@ impl Clone for State { } } -pub async fn retry_for(timeout: Duration, backoff: Duration, f: F) -> Result -where - F: AsyncFn() -> Result, -{ - let start = Instant::now(); - - loop { - match f().await { - Err(e) => { - if start.elapsed() >= timeout { - return Err(e); - } - - time::sleep(backoff).await; - } - - Ok(m) => return Ok(m), - } - } -} - pub trait IntoZResult { fn into_z(self) -> fdo::Result; } diff --git a/orb-connd/tests/fixture.rs b/orb-connd/tests/fixture.rs index 7a31867b0..3a8bee41d 100644 --- a/orb-connd/tests/fixture.rs +++ b/orb-connd/tests/fixture.rs @@ -7,6 +7,7 @@ use mockall::mock; use nix::libc; use orb_connd::{ connectivity_daemon::program, + mcu_util::McuUtil, modem_manager::{ connection_state::ConnectionState, Location, Modem, ModemId, ModemInfo, ModemManager, Signal, SimId, SimInfo, @@ -16,6 +17,7 @@ use orb_connd::{ secure_storage::{ConndStorageScopes, SecureStorage}, service::ProfileStorage, statsd::StatsdClient, + systemd::Systemd, wpa_ctrl::WpaCtrl, OrbCapabilities, }; @@ -25,9 +27,10 @@ use orb_info::{ OrbId, }; use prelude::future::Callback; +use speare::mini; use std::{env, path::PathBuf, str::FromStr, time::Duration}; use test_utils::docker::{self, Container}; -use tokio::{fs, task::JoinHandle, time}; +use tokio::{fs, time}; use tokio_util::sync::CancellationToken; use zbus::Address; use zenorb::{zenoh, Zenorb}; @@ -37,7 +40,7 @@ pub struct Fixture { pub nm: NetworkManager, pub container: Container, conn: zbus::Connection, - program_handles: Vec>>, + speare: mini::Ctx<()>, pub sysfs: PathBuf, pub usr_persistent: PathBuf, pub secure_storage: SecureStorage, @@ -51,9 +54,7 @@ impl Drop for Fixture { fn drop(&mut self) { self.secure_storage_cancel_token.cancel(); - for handle in &self.program_handles { - handle.abort(); - } + self.speare.abort_children().unwrap(); } } @@ -75,6 +76,7 @@ impl Fixture { statsd: Option, wpa_ctrl: Option, arrange: Option>, + mcu_util: Option, #[builder(default = false)] log: bool, ) -> Self { let _ = color_eyre::install(); @@ -162,7 +164,7 @@ impl Fixture { .await .unwrap(); - let program_handles = program() + let speare = program() .os_release(OrbOsRelease { release_type: release, orb_os_platform_type: platform, @@ -173,6 +175,7 @@ impl Fixture { .modem_manager(modem_manager.unwrap_or_else(default_mockmmcli)) .network_manager(nm.clone()) .resolved(Resolved::new(conn.clone())) + .systemd(Systemd::new(conn.clone())) .statsd_client(statsd.unwrap_or(MockStatsd)) .sysfs(sysfs.clone()) .usr_persistent(usr_persistent.clone()) @@ -180,6 +183,7 @@ impl Fixture { .connect_timeout(Duration::from_secs(1)) .profile_storage(profile_storage) .zenoh(&zsession) + .mcu_util(mcu_util.unwrap_or_else(default_mock_mcu_util_cli)) .run() .await .unwrap(); @@ -195,7 +199,7 @@ impl Fixture { Self { nm, conn, - program_handles, + speare, container, sysfs, usr_persistent, @@ -268,6 +272,7 @@ fn default_mockmmcli() -> MockMMCli { mm.expect_modem_info().returning(|_| { let mi = ModemInfo { imei: String::new(), + fw_revision: None, operator_code: None, operator_name: None, access_tech: None, @@ -324,31 +329,22 @@ mock! { pub struct MockStatsd; +#[async_trait] impl StatsdClient for MockStatsd { - async fn count + Sync + Send>( - &self, - _stat: &str, - _count: i64, - _tags: &[S], - ) -> Result<()> { + async fn count(&self, _stat: &str, _count: i64, _tags: Vec) -> Result<()> { Ok(()) } - async fn incr_by_value + Sync + Send>( + async fn incr_by_value( &self, _stat: &str, _value: i64, - _tags: &[S], + _tags: Vec, ) -> Result<()> { Ok(()) } - async fn gauge + Sync + Send>( - &self, - _stat: &str, - _val: &str, - _tags: &[S], - ) -> Result<()> { + async fn gauge(&self, _stat: &str, _val: &str, _tags: Vec) -> Result<()> { Ok(()) } } @@ -367,3 +363,17 @@ mock! { async fn scan_results(&self) -> Result>; } } + +fn default_mock_mcu_util_cli() -> MockMcuUtilCli { + let mut mcu_util = MockMcuUtilCli::new(); + mcu_util.expect_powercycle().returning(|_| Ok(())); + mcu_util +} + +mock! { + pub McuUtilCli {} + #[async_trait] + impl McuUtil for McuUtilCli { + async fn powercycle(&self, module: orb_connd::mcu_util::Module) -> Result<()>; + } +} From 9e6dde89803061aeeed527b8b5d3cda643fc3580 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 25 Mar 2026 13:21:11 +0100 Subject: [PATCH 47/66] chore(hil): bump orb-hil to beta 21 (#1113) bump --- nix/packages/orb-hil.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index 0ebf0d2d4..6b45c5940 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.20"; + version = "0.0.2-beta.21"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-daaE7oRIVduHYsxBNf+RDCSJ9byo7J/ibye0OsX0dPg="; + sha256 = "sha256-6WmSjaWnsgLy1GYOpzNGv80mpCYB3sWMZ/ycKIwJPwU="; }; dontUnpack = true; From c90f1b3e94f56f462acb3e5af94f0b864948427b Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Wed, 25 Mar 2026 16:38:34 -0400 Subject: [PATCH 48/66] feat(se050-reprovision): config builder, challenge endpoints (#1106) scaffolding around submitting the requests. --- Cargo.lock | 2 + se050-reprovision/Cargo.toml | 4 +- se050-reprovision/src/cli.rs | 4 +- se050-reprovision/src/lib.rs | 1 - se050-reprovision/src/main.rs | 13 +-- se050-reprovision/src/remote_api.rs | 148 ++++++++++++++++++++++++++-- 6 files changed, 150 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63f6c8746..89c76713f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8592,11 +8592,13 @@ name = "orb-se050-reprovision" version = "0.0.0" dependencies = [ "base64 0.22.1", + "bon", "clap", "color-eyre", "orb-build-info", "orb-const-concat", "orb-endpoints", + "orb-info", "orb-security-utils", "orb-telemetry", "rand 0.8.5", diff --git a/se050-reprovision/Cargo.toml b/se050-reprovision/Cargo.toml index 822f10b65..bd7c354eb 100644 --- a/se050-reprovision/Cargo.toml +++ b/se050-reprovision/Cargo.toml @@ -12,15 +12,17 @@ rust-version.workspace = true [dependencies] base64.workspace = true +bon.workspace = true clap.workspace = true color-eyre.workspace = true orb-build-info.workspace = true orb-const-concat.workspace = true orb-endpoints.workspace = true +orb-info = { workspace = true, features = ["serde"] } orb-security-utils = { workspace = true, features = ["reqwest"] } orb-telemetry.workspace = true rand.workspace = true -reqwest.workspace = true +reqwest = { workspace = true, features = ["json"] } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/se050-reprovision/src/cli.rs b/se050-reprovision/src/cli.rs index 18ee34910..4fb9065be 100644 --- a/se050-reprovision/src/cli.rs +++ b/se050-reprovision/src/cli.rs @@ -13,7 +13,7 @@ pub struct CliOutput { iris_code_key: KeyInfo, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct KeyInfo { /// PEM format key: String, @@ -21,7 +21,7 @@ pub struct KeyInfo { signature: Vec, #[serde(with = "crate::base64_serde")] extra_data: Vec, - active: bool, + // active: bool, } pub async fn call(cfg: &Config, nonce: u128) -> Result { diff --git a/se050-reprovision/src/lib.rs b/se050-reprovision/src/lib.rs index f3fc87f6b..37daa51a5 100644 --- a/se050-reprovision/src/lib.rs +++ b/se050-reprovision/src/lib.rs @@ -16,7 +16,6 @@ pub const BUILD_INFO: BuildInfo = make_build_info!(); #[derive(Debug, Clone)] pub struct Config { pub rng: StdRng, - pub base_url: String, pub client: crate::remote_api::Client, /// Path to the CA that performs the re-enrollment pub ca_path: PathBuf, diff --git a/se050-reprovision/src/main.rs b/se050-reprovision/src/main.rs index e42ab06e1..898386e20 100644 --- a/se050-reprovision/src/main.rs +++ b/se050-reprovision/src/main.rs @@ -12,16 +12,11 @@ pub struct Args {} impl Args { fn make_config(self, backend: Backend) -> Result { - let subdomain = match backend { - Backend::Prod => "orb", - Backend::Staging => "stage.orb", - Backend::Analysis => "analysis.ml", - Backend::Local => unreachable!(), - }; - Ok(Config { - base_url: format!("https://auth.{subdomain}.worldcoin.org"), - client: orb_se050_reprovision::remote_api::Client::new()?, + client: orb_se050_reprovision::remote_api::Client::builder() + .default_reqwest_client()? + .from_backend(backend) + .build(), ca_path: PathBuf::from("/usr/local/bin/orb-se050-reprovision-ca"), rng: StdRng::from_entropy(), }) diff --git a/se050-reprovision/src/remote_api.rs b/se050-reprovision/src/remote_api.rs index 0287656fd..c6a85d97b 100644 --- a/se050-reprovision/src/remote_api.rs +++ b/se050-reprovision/src/remote_api.rs @@ -1,6 +1,14 @@ +//! This module is the lower level remote api. + +use bon::Builder; use color_eyre::eyre::{Result, WrapErr as _}; use orb_const_concat::const_concat; +use orb_endpoints::Backend; +use orb_info::OrbId; +use tracing::warn; +use self::client_builder::{IsUnset, SetBaseUrl, SetClient, State}; +use crate::cli::KeyInfo; use crate::BUILD_INFO; const USER_AGENT: &str = const_concat!( @@ -10,16 +18,138 @@ const USER_AGENT: &str = const_concat!( BUILD_INFO.git.describe, ); -#[derive(Debug, Clone)] -pub struct Client(pub reqwest::Client); +#[derive(Debug, Clone, Builder)] +pub struct Client { + // Make the default generated setter private, and rename + // it so it doesn't collide with our custom method name + #[builder(setters(vis = "", name = client_internal))] + client: reqwest::Client, + #[builder(setters(vis = "", name = base_url_internal))] + base_url: String, +} + +impl ClientBuilder +where + S::Client: IsUnset, +{ + pub fn custom_reqwest_client( + self, + client: reqwest::Client, + ) -> ClientBuilder> { + self.client_internal(client) + } + + pub fn default_reqwest_client(self) -> Result>> { + let client = orb_security_utils::reqwest::http_client_builder() + .user_agent(USER_AGENT) + .build() + .wrap_err("failed to create http client")?; + + Ok(self.client_internal(client)) + } +} + +impl ClientBuilder +where + S::BaseUrl: IsUnset, +{ + pub fn from_backend(self, backend: Backend) -> ClientBuilder> { + let subdomain = match backend { + Backend::Prod => "orb", + Backend::Staging => "stage.orb", + Backend::Analysis => "analysis.ml", + Backend::Local => unreachable!(), + }; + let base_url = format!("https://auth.{subdomain}.worldcoin.org"); + self.base_url_internal(base_url) + } + + pub fn local_backend(self, port: u16) -> ClientBuilder> { + self.base_url_internal(format!("http://localhost:{port}")) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Challenge { + orb_id: OrbId, + /// Combined with orb_nonce for freshness + server_nonce: u128, +} + +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct PubkeyPem(String); + +#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] +pub struct Proof { + #[serde(skip)] + orb_id: OrbId, + server_nonce: u128, + /// Combined with server_nonce for freshness + orb_nonce: u128, + jetson_authkey: KeyInfo, + attestation_key: KeyInfo, + iris_code_key: KeyInfo, +} impl Client { - pub fn new() -> Result { - Ok(Self( - orb_security_utils::reqwest::http_client_builder() - .user_agent(USER_AGENT) - .build() - .wrap_err("failed to create http client")?, - )) + pub async fn start_challenge( + &self, + orb_id: &OrbId, + legacy_bearer: Option, + ) -> Result { + // TODO: Align with FM team on endpoints + let url = format!("{}/api/v1/start_challenge/{}", self.base_url, orb_id); + let request = self.client.put(&url); + let request = if let Some(bearer) = legacy_bearer { + request.bearer_auth(bearer) + } else { + warn!("no legacy bearer token provided, omitting it"); + request + }; + let response = request + .send() + .await + .wrap_err_with(|| format!("failed to transmit request to PUT {url}"))? + .error_for_status() + .wrap_err_with(|| format!("HTTP Error for PUT {url}"))? + .bytes() + .await + .wrap_err_with(|| format!("failed to receive payload for PUT {url}"))?; + + let bytes = response.as_ref(); + let bytes: &[u8; 16] = bytes.try_into().wrap_err_with(|| { + format!("bytes were wrong length, expected 16, got {}", bytes.len()) + })?; + let server_nonce = u128::from_be_bytes(*bytes); + + Ok(Challenge { + orb_id: orb_id.clone(), + server_nonce, + }) + } + + pub async fn finish_challenge( + &self, + proof: Proof, + legacy_bearer: Option, + ) -> Result<()> { + // TODO: Align with FM team on endpoints + let url = format!("{}/api/v1/finish_challenge/{}", self.base_url, proof.orb_id); + let request = self.client.put(&url); + let request = if let Some(bearer) = legacy_bearer { + request.bearer_auth(bearer) + } else { + warn!("no legacy bearer token provided, omitting it"); + request + }; + request + .json(&proof) + .send() + .await + .wrap_err_with(|| format!("failed to transmit request to PUT {url}"))? + .error_for_status() + .wrap_err_with(|| format!("HTTP Error for PUT {url}"))?; + + Ok(()) } } From 1553d1e1ccd51f8f6c24efe7078dbb17b143b8d6 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 25 Mar 2026 21:53:26 +0100 Subject: [PATCH 49/66] feat: override hostname in orb-hil cmd (#1114) useful for nfsboot Co-authored-by: Ryan Butler --- hil/src/commands/cmd.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hil/src/commands/cmd.rs b/hil/src/commands/cmd.rs index d70c3c13f..19ad1a9bb 100644 --- a/hil/src/commands/cmd.rs +++ b/hil/src/commands/cmd.rs @@ -52,6 +52,10 @@ pub struct Cmd { #[arg(long, value_enum, default_value_t = CommandTransport::Serial)] transport: CommandTransport, + /// Override the SSH hostname (takes precedence over --orb-id derived hostname) + #[arg(long)] + hostname: Option, + /// Username for SSH/Teleport #[arg(long)] username: Option, @@ -114,7 +118,9 @@ impl Cmd { hostname: match transport { // teleport needs to resolve the hostname, so we ignore it RemoteTransport::Teleport => None, - RemoteTransport::Ssh => orb_config.get_hostname(), + RemoteTransport::Ssh => { + self.hostname.clone().or_else(|| orb_config.get_hostname()) + } }, orb_id: orb_config.orb_id.clone(), username: self.username, @@ -300,6 +306,7 @@ mod test { Cmd { cmd: "pwd".to_owned(), transport: CommandTransport::Ssh, + hostname: None, username: None, port: 22, password: None, From eee8820e6e5c61dae457241208aa1a61f028ef33 Mon Sep 17 00:00:00 2001 From: vmenge Date: Thu, 26 Mar 2026 15:19:18 +0100 Subject: [PATCH 50/66] fix(connd): wifi profile persistence after bumping priority (#1117) ## problem when using the `connect_to_wifi` functionality in `orb-connd`, we udpate the profile's priority to be the highest (so on further restarts the orb always connects to it first). we do so by deleting, then re-creating the profile. when re-creating the profile i forgot to set `.persist()` to make sure we save it to disk. ## fix the actual fix is a single line fix in `orb-connd/src/service/dbus.rs`. everything else were changes required to have a proper regression test for this. a lot of plumbing for restarting docker and keeping the same temporary directory. this could probably be cleaned up in the future but that will be a task for another day --- Cargo.lock | 2 +- orb-connd/src/service/dbus.rs | 1 + orb-connd/tests/docker/docker-entry.sh | 8 ++ orb-connd/tests/fixture.rs | 119 ++++++++++++++++++++++--- orb-connd/tests/profile_management.rs | 37 ++++++++ test-utils/Cargo.toml | 2 +- test-utils/src/docker.rs | 37 +++++++- 7 files changed, 191 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 89c76713f..6d1a14d49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12821,8 +12821,8 @@ dependencies = [ name = "test-utils" version = "0.1.0" dependencies = [ + "async-tempfile", "nix 0.28.0", - "tempfile", "testcontainers", "tokio", ] diff --git a/orb-connd/src/service/dbus.rs b/orb-connd/src/service/dbus.rs index b10840e3d..4b4182e88 100644 --- a/orb-connd/src/service/dbus.rs +++ b/orb-connd/src/service/dbus.rs @@ -295,6 +295,7 @@ impl ConndT for ConndService { .psk(&profile.psk) .priority(next_priority) .hidden(profile.hidden) + .persist(self.profile_storage.should_persist()) .add() .await .into_z()?; diff --git a/orb-connd/tests/docker/docker-entry.sh b/orb-connd/tests/docker/docker-entry.sh index d2644fe79..63a1b808d 100644 --- a/orb-connd/tests/docker/docker-entry.sh +++ b/orb-connd/tests/docker/docker-entry.sh @@ -23,5 +23,13 @@ chmod 660 /run/integration-tests/socket echo "starting zenoh" zenohd --config=/etc/zenohd.json5 & +mkdir -p /etc/NetworkManager/system-connections +chown root:root /etc/NetworkManager/system-connections +chmod 700 /etc/NetworkManager/system-connections + +find /etc/NetworkManager/system-connections -type f -name '*.nmconnection' \ + -exec chown root:root {} \; \ + -exec chmod 600 {} \; + echo "starting network-manager" exec /usr/sbin/NetworkManager --no-daemon diff --git a/orb-connd/tests/fixture.rs b/orb-connd/tests/fixture.rs index 3a8bee41d..98e75284f 100644 --- a/orb-connd/tests/fixture.rs +++ b/orb-connd/tests/fixture.rs @@ -1,4 +1,5 @@ #![allow(dead_code)] +use async_tempfile::TempDir; use async_trait::async_trait; use bon::bon; use color_eyre::Result; @@ -45,9 +46,11 @@ pub struct Fixture { pub usr_persistent: PathBuf, pub secure_storage: SecureStorage, pub secure_storage_cancel_token: CancellationToken, + pub platform: OrbOsPlatform, zsession: Zenorb, router_port: u16, pub orb_id: String, + pub connd_path: PathBuf, } impl Drop for Fixture { @@ -85,9 +88,11 @@ impl Fixture { let _ = orb_telemetry::TelemetryConfig::new().init(); } - let (container, router_port) = setup_container().await; - let sysfs = container.tempdir.path().join("sysfs"); - let usr_persistent = container.tempdir.path().join("usr_persistent"); + let (container, router_port) = + setup_container(TempDir::new().await.unwrap()).await; + + let sysfs = container.tempdir.dir_path().join("sysfs"); + let usr_persistent = container.tempdir.dir_path().join("usr_persistent"); let network_manager_folder = usr_persistent.join("network-manager"); fs::create_dir_all(&sysfs).await.unwrap(); fs::create_dir_all(&usr_persistent).await.unwrap(); @@ -110,7 +115,7 @@ impl Fixture { time::sleep(Duration::from_secs(1)).await; - let dbus_socket = container.tempdir.path().join("socket"); + let dbus_socket = container.tempdir.dir_path().join("socket"); let dbus_socket = format!("unix:path={}", dbus_socket.display()); let addr: Address = dbus_socket.parse().unwrap(); @@ -208,6 +213,8 @@ impl Fixture { router_port, zsession, orb_id: orb_id.to_string(), + platform, + connd_path: built_connd.path().into(), } } @@ -220,9 +227,88 @@ impl Fixture { .await .unwrap() } + + pub async fn restart(&mut self) { + self.speare.abort_children().unwrap(); + self.container.rm().await; + + time::sleep(Duration::from_secs(1)).await; + + let (container, zenohport) = + setup_container(self.container.tempdir.try_clone().await.unwrap()).await; + + time::sleep(Duration::from_secs(1)).await; + + self.router_port = zenohport; + self.container = container; + + let dbus_socket = self.container.tempdir.dir_path().join("socket"); + let dbus_socket = format!("unix:path={}", dbus_socket.display()); + let addr: Address = dbus_socket.parse().unwrap(); + + self.conn = zbus::ConnectionBuilder::address(addr) + .unwrap() + .build() + .await + .unwrap(); + + let nm = NetworkManager::new(self.conn.clone(), default_mock_wpa_cli()); + nm.wait_for_nm_ready().await.unwrap(); + + let cancel_token = CancellationToken::new(); + let secure_storage = SecureStorage::new( + self.connd_path.clone(), + true, + cancel_token.clone(), + ConndStorageScopes::NmProfiles, + ); + + let profile_storage = match self.platform { + OrbOsPlatform::Pearl => ProfileStorage::NetworkManager, + OrbOsPlatform::Diamond => { + ProfileStorage::SecureStorage(secure_storage.clone()) + } + }; + + let speare = program() + .os_release(OrbOsRelease { + release_type: OrbRelease::Dev, + orb_os_platform_type: self.platform, + orb_os_version: String::new(), + expected_main_mcu_version: String::new(), + expected_sec_mcu_version: String::new(), + }) + .modem_manager(default_mockmmcli()) + .network_manager(nm.clone()) + .resolved(Resolved::new(self.conn.clone())) + .systemd(Systemd::new(self.conn.clone())) + .statsd_client(MockStatsd) + .sysfs(self.sysfs.clone()) + .usr_persistent(self.usr_persistent.clone()) + .session_bus(self.conn.clone()) + .connect_timeout(Duration::from_secs(1)) + .profile_storage(profile_storage) + .zenoh(&self.zsession) + .mcu_util(default_mock_mcu_util_cli()) + .run() + .await + .unwrap(); + + self.nm = nm; + self.speare = speare; + self.secure_storage_cancel_token = cancel_token; + + let millisecs = if env::var("GITHUB_ACTIONS").is_ok() { + 4_000 + } else { + 500 + }; + + time::sleep(Duration::from_millis(millisecs)).await; + } } -async fn setup_container() -> (Container, u16) { +async fn setup_container(tempdir: TempDir) -> (Container, u16) { let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let docker_ctx = crate_dir.join("tests").join("docker"); let dockerfile = crate_dir.join("tests").join("docker").join("Dockerfile"); @@ -233,18 +319,31 @@ async fn setup_container() -> (Container, u16) { let gid = unsafe { libc::getegid() }; let zenohport = portpicker::pick_unused_port().expect("No ports free"); - - let container = docker::run( + let nm_profiles_dir = tempdir.dir_path().join("system-connections"); + fs::create_dir_all(&nm_profiles_dir).await.unwrap(); + + let target_uid = format!("TARGET_UID={uid}"); + let target_gid = format!("TARGET_GID={gid}"); + let zenoh_mapping = format!("-p={zenohport}:7447"); + let nm_profiles_volume = format!( + "{}:/etc/NetworkManager/system-connections", + nm_profiles_dir.display() + ); + + let container = docker::run_with( tag, [ "--pid=host", "--userns=host", "-e", - &format!("TARGET_UID={uid}"), + &target_uid, "-e", - &format!("TARGET_GID={gid}"), - &format!("-p={zenohport}:7447"), + &target_gid, + "-v", + &nm_profiles_volume, + &zenoh_mapping, ], + tempdir, ) .await; diff --git a/orb-connd/tests/profile_management.rs b/orb-connd/tests/profile_management.rs index a02ff8194..8c6bc0574 100644 --- a/orb-connd/tests/profile_management.rs +++ b/orb-connd/tests/profile_management.rs @@ -336,3 +336,40 @@ async fn it_bumps_priority_of_wifi_profile_on_manual_connection_attempt() { let new_bla = profiles.iter().find(|p| p.ssid == "bla").unwrap(); assert!(bla.priority == new_bla.priority); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn profile_is_persisted_after_bumping_priority() { + // Arrange + let mut fx = Fixture::platform(OrbOsPlatform::Pearl) + .cap(OrbCapabilities::CellularAndWifi) + .release(OrbRelease::Dev) + .run() + .await; + + let connd = fx.connd().await; + + // Act: create profile + connd + .add_wifi_profile("bla".into(), "wpa2".into(), "12345678".into(), false) + .await + .unwrap(); + + // Act: create second profile with higher priority + connd + .add_wifi_profile("bla2".into(), "wpa2".into(), "12345678".into(), false) + .await + .unwrap(); + + // Act: force connect, should rewrite profile to raise priority + // will fail due to ssid "bla" not existing + let _ = connd.connect_to_wifi("bla".into()).await; + + // Act: restart connd and environment -- profile should be reloaded + drop(connd); + fx.restart().await; + + // Assert: both profiles are still persisted + let profiles = fx.nm.list_wifi_profiles().await.unwrap(); + assert!(profiles.iter().any(|p| p.ssid == "bla2")); + assert!(profiles.iter().any(|p| p.ssid == "bla")); +} diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 4ab391c5a..bf67994ff 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -10,7 +10,7 @@ rust-version.workspace = true publish = false [dependencies] +async-tempfile.workspace = true nix = { workspace = true, features = ["socket"] } -tempfile.workspace = true testcontainers.workspace = true tokio = { workspace = true, features = ["full"] } diff --git a/test-utils/src/docker.rs b/test-utils/src/docker.rs index eb5d070b9..ef9507844 100644 --- a/test-utils/src/docker.rs +++ b/test-utils/src/docker.rs @@ -1,7 +1,7 @@ +use async_tempfile::TempDir; use std::ffi::OsStr; use std::path::Path; use std::process::Output; -use tempfile::TempDir; use tokio::process::Command; use tokio::task; @@ -38,8 +38,22 @@ where I: IntoIterator, S: AsRef, { - let tempdir = TempDir::new_in("/tmp").unwrap(); - let tempdir_path = tempdir.path().canonicalize().unwrap(); + let tempdir = TempDir::new().await.unwrap(); + + run_with(img, args, tempdir).await +} + +/// Starts a container with a temporary directory mounted to /run/integration-tests +pub async fn run_with( + img: impl AsRef, + args: I, + tempdir: TempDir, +) -> Container +where + I: IntoIterator, + S: AsRef, +{ + let tempdir_path = tempdir.dir_path().canonicalize().unwrap(); let out = Command::new("docker") .args(["run", "-d", "--rm"]) @@ -93,4 +107,21 @@ impl Container { .await .unwrap() } + + pub async fn restart(&self) -> Output { + Command::new("docker") + .arg("restart") + .arg(&self.id) + .output() + .await + .unwrap() + } + + pub async fn rm(&self) { + Command::new("docker") + .args(["rm", "-f", &self.id]) // force stop + remove + .output() + .await + .unwrap(); + } } From c03e3c019880334975b2b0d5c54d544cf0e4ddc9 Mon Sep 17 00:00:00 2001 From: Ryan Butler Date: Thu, 26 Mar 2026 15:26:50 -0400 Subject: [PATCH 51/66] build: added devcontainers (#1115) Ports over my implementation of devcontainers in orb-rustzone to orb-sotware. Replaces the previous nonfunctional and unmaintained devcontainer. --------- Co-authored-by: AlexKaravaev --- .devcontainer/Dockerfile | 77 +++++++++++++----- .../Seek_Thermal_SDK_4.1.0.0.zip.sha256sum | 1 - .../Seek_Thermal_SDK_4.4.2.20.zip.sha256sum | 1 - .devcontainer/devcontainer.json | 36 +++++++-- .devcontainer/postCreateCommand.sh | 17 ++++ .devcontainer/run.sh | 12 +-- .devcontainer/starship.toml | 81 +++++++++++++++++++ nix/shells/development.nix | 2 +- 8 files changed, 190 insertions(+), 37 deletions(-) delete mode 100644 .devcontainer/Seek_Thermal_SDK_4.1.0.0.zip.sha256sum delete mode 100644 .devcontainer/Seek_Thermal_SDK_4.4.2.20.zip.sha256sum create mode 100755 .devcontainer/postCreateCommand.sh create mode 100644 .devcontainer/starship.toml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index be529a15c..16af4cbf9 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,21 +1,58 @@ -# syntax=docker/dockerfile:1.4 -FROM mcr.microsoft.com/devcontainers/rust:latest - -RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ - && apt-get -y install libudev-dev libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libasound2-dev protobuf-compiler libsquashfs-dev libclang-dev - -ADD Seek_Thermal_SDK_4.1.0.0.zip /tmp/ -ADD Seek_Thermal_SDK_4.1.0.0.zip.sha256sum /tmp/ - -# Check that we got the same version of the Seek Thermal SDK as expected -RUN cd /tmp/ && sha256sum -c /tmp/Seek_Thermal_SDK_4.1.0.0.zip.sha256sum - -RUN < /etc/sudoers.d/${USER} \ + && chmod 0440 /etc/sudoers.d/${USER} + +USER ${USER} + +# Install Nix files, but no init integration. +# The Nix daemon won't work as systemd is disabled, thus we +# install Nix in single user more +RUN /bin/bash -c "sh <(curl -L https://nixos.org/nix/install) --no-daemon" + +# Unfortunately docker doesn't load the user's env variables automatically. +# Similar to: +# https://github.com/NixOS/nix/blob/0eb9946e1d3621cfc2fcffc9378dba334b25fb26/docker.nix#L259-L264 +ENV PATH="${PATH}:/home/${USER}/.nix-profile/bin" + +# As we have a local Nix installation, we need to edit the local config +RUN mkdir -p ${HOME}/.config/nix \ + && cat >> "${HOME}/.config/nix/nix.conf" <<'EOF' +experimental-features = nix-command flakes +sandbox = false +max-jobs = auto +EOF + +# Install packages +RUN nix profile add \ + nixpkgs#awscli2 \ + nixpkgs#direnv \ + nixpkgs#neovim \ + nixpkgs#starship \ + nixpkgs#vim + +# Configure shell +RUN cat >> ${HOME}/.bashrc <<'EOF' +eval "$(direnv hook bash)" +eval "$(starship init bash)" EOF +RUN cat >> ${HOME}/.zshrc <<'EOF' +eval "$(direnv hook zsh)" +eval "$(starship init zsh)" +EOF +COPY ./starship.toml ${HOME}/.config/starship.toml + +WORKDIR /home/${USER} + +CMD ["/bin/bash"] diff --git a/.devcontainer/Seek_Thermal_SDK_4.1.0.0.zip.sha256sum b/.devcontainer/Seek_Thermal_SDK_4.1.0.0.zip.sha256sum deleted file mode 100644 index 076ebe30c..000000000 --- a/.devcontainer/Seek_Thermal_SDK_4.1.0.0.zip.sha256sum +++ /dev/null @@ -1 +0,0 @@ -564eae560e97e6114c3538d2889841eef668524a53860090ddda9320b2968606 Seek_Thermal_SDK_4.1.0.0.zip diff --git a/.devcontainer/Seek_Thermal_SDK_4.4.2.20.zip.sha256sum b/.devcontainer/Seek_Thermal_SDK_4.4.2.20.zip.sha256sum deleted file mode 100644 index e5d0dd025..000000000 --- a/.devcontainer/Seek_Thermal_SDK_4.4.2.20.zip.sha256sum +++ /dev/null @@ -1 +0,0 @@ -825f324020b44f4b6326687ffff17c9b305ccb44dce6616e43e7392a591e6e80 Seek_Thermal_SDK_4.4.2.20.zip diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 8d5a8382c..973df6849 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,10 +1,30 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, { - "build": { - "dockerfile": "Dockerfile" - }, - "workspaceMount": "", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "runArgs": [ - "--volume=${localWorkspaceFolder}:/workspaces/${localWorkspaceFolderBasename}", - ] + "name": "Ubuntu", + "build": { + "dockerfile": "Dockerfile", + "context": "." + }, + "remoteUser": "ubuntu", + "containerEnv": { + "CACHIX_AUTH_TOKEN": "${localEnv:CACHIX_AUTH_TOKEN}" + }, + "customizations": { + "vscode": { + "extensions": [ + "rust-lang.rust-analyzer", + "tamasfe.even-better-toml", + "mkhl.direnv" + ] + } + }, + "initializeCommand": "mkdir -p ${localEnv:HOME}/.config/nix ${localEnv:HOME}/.aws", + "mounts": [ + // Keep the rust target directory in a persistent docker volume instead of bind mount + "source=${localWorkspaceFolderBasename}-rust-target-vol,target=${containerWorkspaceFolder}/target,type=volume", + "source=${localEnv:HOME}/.aws,target=/home/ubuntu/.aws,type=bind", + "source=${localEnv:HOME}/.config/nix,target=${localEnv:HOME}/.config/nix,type=bind" + ], + // "overrideCommand": false, // Broken in devcontainer cli qwq https://github.com/devcontainers/cli/issues/816 + "postCreateCommand": ".devcontainer/postCreateCommand.sh" } diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh new file mode 100755 index 000000000..a675734d8 --- /dev/null +++ b/.devcontainer/postCreateCommand.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -Eeuxo pipefail + +sudo chown ubuntu target + +git config --global --add safe.directory /workspaces/orb-software + +# Get direnv to work in the bash scripts +if [ ! -e .envrc ]; then + cp .envrc.example .envrc # Bootstrap for the user + direnv allow +fi + +if [ -e .devcontainer/postCreateCommand.user.sh ]; then + .devcontainer/postCreateCommand.user.sh +fi diff --git a/.devcontainer/run.sh b/.devcontainer/run.sh index f9a854a3b..19621ce56 100755 --- a/.devcontainer/run.sh +++ b/.devcontainer/run.sh @@ -1,24 +1,24 @@ -#!/bin/bash +#!/usr/bin/env bash # This script runs a command (or $SHELL if none is provided) in the dev container. # If the dev container isn't running yet, it will start it up. set -Eeuxo pipefail # We do this to be agnostic to cwd when we invoke the script. -ORB_SW_DIR="$(realpath "$(dirname "${0}")/../")" +TOPLEVEL_DIR=$(dirname "$0")/../ # Figure out what the command we will execute will be, store it in CMD. if [ "$#" -eq "0" ]; then - CMD=$SHELL + CMD="/usr/bin/env "$(basename $SHELL)"" else - CMD="$@" + CMD="$@" fi # `devcontainer up` creates and starts the container or reuses the existing one. # Sed extracts the container ID from the stdout of the prior command. -CONTAINER_ID=$(devcontainer up --workspace-folder "$ORB_SW_DIR" | sed 's/.*"containerId":"\([^"]*\)".*/\1/') +CONTAINER_ID=$(devcontainer up --workspace-folder "$TOPLEVEL_DIR" | sed 's/.*"containerId":"\([^"]*\)".*/\1/') # Actually execute CMD. We do this instead of devcontainer exec because the # latter caused issues with TUIs like neovim, whereas docker exec does not seem # to have these issues. -docker exec -it -w /workspaces/"$(basename "${ORB_SW_DIR}")" -e SHELL=${SHELL} "$CONTAINER_ID" $CMD +docker exec -it -w /workspaces/orb-software "$CONTAINER_ID" $CMD diff --git a/.devcontainer/starship.toml b/.devcontainer/starship.toml new file mode 100644 index 000000000..ffed980ce --- /dev/null +++ b/.devcontainer/starship.toml @@ -0,0 +1,81 @@ +# Get editor completions based on the config schema +"$schema" = 'https://starship.rs/config-schema.json' + +format = """ +$os\ +$hostname\ +[](bg:#769ff0 fg:#a3aed2)\ +$directory\ +[](fg:#769ff0 bg:#394260)\ +$git_branch\ +$git_status\ +[](fg:#394260 bg:#212736)\ +$nodejs\ +$rust\ +$golang\ +$php\ +[](fg:#212736 bg:#1d2230)\ +$time\ +[ ](fg:#1d2230)\ +\n$character""" + +[directory] +format = "[ $path ]($style)" +style = "fg:#e3e5e5 bg:#769ff0" +truncation_length = 3 +truncation_symbol = "…/" + +[directory.substitutions] +"Documents" = " " +"Downloads" = " " +"Music" = " " +"Pictures" = " " + +[git_branch] +format = '[[ $symbol $branch ](fg:#769ff0 bg:#394260)]($style)' +style = "bg:#394260" +symbol = "" + +[git_status] +format = '[[($all_status$ahead_behind )](fg:#769ff0 bg:#394260)]($style)' +style = "bg:#394260" + +[nodejs] +format = '[[ $symbol ($version) ](fg:#769ff0 bg:#212736)]($style)' +style = "bg:#212736" +symbol = "" + +[rust] +format = '[[ $symbol ($version) ](fg:#769ff0 bg:#212736)]($style)' +style = "bg:#212736" +symbol = "" + +[golang] +format = '[[ $symbol ($version) ](fg:#769ff0 bg:#212736)]($style)' +style = "bg:#212736" +symbol = "ﳑ" + +[php] +format = '[[ $symbol ($version) ](fg:#769ff0 bg:#212736)]($style)' +style = "bg:#212736" +symbol = "" + +[time] +disabled = false +format = '[[  $time ](fg:#a0a9cb bg:#1d2230)]($style)' +style = "bg:#1d2230" +time_format = "%R" # Hour:Minute Format + +[os] +disabled = false +format = '[ $symbol ]($style)' +style = "bg:#a3aed2 fg:#090c0c" + +[os.symbols] +Macos = " " +Ubuntu = " " + +[hostname] +format = '[$hostname]($style)' +ssh_only = true +style = "bg:#a3aed2 fg:#090c0c" diff --git a/nix/shells/development.nix b/nix/shells/development.nix index 7c22ad150..5e286e809 100644 --- a/nix/shells/development.nix +++ b/nix/shells/development.nix @@ -141,9 +141,9 @@ in nixfmt-tree # Nix autoformatter nushell # Cross platform shell for scripts protobuf # Needed for orb-messages and other protobuf dependencies - sshpass # Non-interactive ssh password auth squashfsTools # mksquashfs sshpass # Needed for orb-software/scripts + sshpass # Non-interactive ssh password auth taplo # toml autoformatter unstable.cargo-deny # Checks licenses and security advisories zbus-xmlgen # Used by `orb-zbus-proxies` From b2bdcdf42508f6d7c1bd0c74ea37f69cc05f5ac2 Mon Sep 17 00:00:00 2001 From: vmenge Date: Fri, 27 Mar 2026 13:01:59 +0100 Subject: [PATCH 52/66] feat(connd): enrich active connections (#1118) ## changes - this PR adds extra information to the active connections report: link status and routes for every interface - also removed `net_changed` zenoh event emitted by `orb-connd`, instead reusing `oes::ActiveConnections` in its place to consolidate things ## bugfixes - this also fixes a bug where magic qr was not resetting the connection in `jobs-agent` ## todo - [ ] test on an orb --- Cargo.lock | 14 +- Cargo.toml | 1 - orb-backend-status/Cargo.toml | 3 +- .../src/collectors/connectivity.rs | 36 +- orb-backend-status/src/lib.rs | 2 +- .../tests/backend_status_service.rs | 2 +- orb-backend-status/tests/fixture.rs | 80 ++-- orb-connd/Cargo.toml | 1 - orb-connd/events/Cargo.toml | 14 - orb-connd/events/src/lib.rs | 36 -- orb-connd/src/connectivity_daemon.rs | 5 +- orb-connd/src/main.rs | 1 + orb-connd/src/network_manager/mod.rs | 30 +- orb-connd/src/reporters/active_connections.rs | 394 +++++++++++++++--- orb-connd/src/reporters/connd_report.rs | 6 +- orb-connd/src/reporters/mod.rs | 17 +- orb-connd/src/reporters/net_state.rs | 80 ---- orb-connd/src/service/dbus.rs | 34 +- orb-connd/tests/fixture.rs | 47 ++- orb-connd/tests/reporter.rs | 46 -- orb-jobs-agent/Cargo.toml | 2 +- orb-jobs-agent/src/conn_change.rs | 37 +- zorb/Cargo.toml | 1 - zorb/src/main.rs | 70 +--- 24 files changed, 523 insertions(+), 436 deletions(-) delete mode 100644 orb-connd/events/Cargo.toml delete mode 100644 orb-connd/events/src/lib.rs delete mode 100644 orb-connd/src/reporters/net_state.rs delete mode 100644 orb-connd/tests/reporter.rs diff --git a/Cargo.lock b/Cargo.lock index 6d1a14d49..c4f3d834d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7937,10 +7937,10 @@ dependencies = [ "eyre", "flume", "futures-util", + "oes", "orb-backend-status-dbus", "orb-build-info", "orb-connd-dbus", - "orb-connd-events", "orb-endpoints", "orb-info", "orb-messages 0.0.0 (git+https://github.com/worldcoin/orb-messages?rev=af472fadb57ce55ac63f8f94bd2a0608e62405c7)", @@ -8189,7 +8189,6 @@ dependencies = [ "orb-backend-status-dbus", "orb-build-info", "orb-connd-dbus", - "orb-connd-events", "orb-info", "orb-secure-storage-ca", "orb-telemetry", @@ -8233,14 +8232,6 @@ dependencies = [ "zbus", ] -[[package]] -name = "orb-connd-events" -version = "0.1.0" -dependencies = [ - "rkyv", - "serde", -] - [[package]] name = "orb-const-concat" version = "0.0.0" @@ -8358,10 +8349,10 @@ dependencies = [ "libc", "listenfd", "mockall", + "oes", "once_cell", "orb-build-info", "orb-connd-dbus", - "orb-connd-events", "orb-endpoints", "orb-info", "orb-relay-client", @@ -15922,7 +15913,6 @@ dependencies = [ "clap", "color-eyre", "orb-build-info", - "orb-connd-events", "orb-info", "rkyv", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 959f20e82..a0674a73d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -201,7 +201,6 @@ orb-blob-p2p.path = "experiments/orb-blob/p2p" orb-build-info.path = "build-info" orb-connd.path = "orb-connd" orb-connd-dbus.path = "orb-connd/dbus" -orb-connd-events.path = "orb-connd/events" orb-const-concat.path = "const-concat" orb-endpoints.path = "endpoints" orb-header-parsing.path = "header-parsing" diff --git a/orb-backend-status/Cargo.toml b/orb-backend-status/Cargo.toml index 1cb286dbe..1a12b997a 100644 --- a/orb-backend-status/Cargo.toml +++ b/orb-backend-status/Cargo.toml @@ -17,10 +17,10 @@ color-eyre.workspace = true flume.workspace = true futures-util = { version = "0.3.31" } eyre.workspace = true +oes.workspace = true orb-backend-status-dbus.workspace = true orb-build-info.workspace = true orb-connd-dbus.workspace = true -orb-connd-events.workspace = true orb-endpoints.workspace = true orb-messages.workspace = true orb-info = { workspace = true, features = [ @@ -57,7 +57,6 @@ zenorb.workspace = true [dev-dependencies] dbus-launch.workspace = true eyre.workspace = true -orb-connd-events.workspace = true portpicker = "0.1.1" proptest.workspace = true serial_test.workspace = true diff --git a/orb-backend-status/src/collectors/connectivity.rs b/orb-backend-status/src/collectors/connectivity.rs index f348ba8f3..a9f80fbee 100644 --- a/orb-backend-status/src/collectors/connectivity.rs +++ b/orb-backend-status/src/collectors/connectivity.rs @@ -1,9 +1,7 @@ use super::ZenorbCtx; use color_eyre::Result; -use orb_connd_events::Connection; -use rkyv::AlignedVec; use tracing::debug; -use zenorb::zenoh; +use zenorb::zenoh::sample::Sample; #[derive(Debug, Clone, PartialEq, Eq)] pub enum GlobalConnectivity { @@ -26,27 +24,21 @@ impl GlobalConnectivity { pub(crate) async fn handle_connection_event( ctx: ZenorbCtx, - sample: zenoh::sample::Sample, + sample: Sample, ) -> Result<()> { let payload = sample.payload().to_bytes(); - let mut bytes = AlignedVec::with_capacity(payload.len()); - bytes.extend_from_slice(&payload); - - let archived = rkyv::check_archived_root::(&bytes) - .map_err(|e| color_eyre::eyre::eyre!("{e}"))?; - - let connectivity = match archived { - orb_connd_events::ArchivedConnection::ConnectedGlobal(kind) => { - let ssid = match kind { - orb_connd_events::ArchivedConnectionKind::Wifi { ssid } => { - Some(ssid.to_string()) - } - orb_connd_events::ArchivedConnectionKind::Ethernet - | orb_connd_events::ArchivedConnectionKind::Cellular { .. } => None, - }; - GlobalConnectivity::Connected { ssid } - } - _ => GlobalConnectivity::NotConnected, + let active_conns: oes::ActiveConnections = serde_json::from_slice(&payload)?; + let connected = active_conns.connections.iter().any(|c| c.has_internet); + let ssid = active_conns + .connections + .into_iter() + .find(|c| c.iface == oes::NetworkInterface::WiFi && c.has_internet) + .map(|c| c.name); + + let connectivity = if connected { + GlobalConnectivity::Connected { ssid } + } else { + GlobalConnectivity::NotConnected }; let prev = ctx.connectivity_tx.borrow().clone(); diff --git a/orb-backend-status/src/lib.rs b/orb-backend-status/src/lib.rs index 353545a65..c992c610c 100644 --- a/orb-backend-status/src/lib.rs +++ b/orb-backend-status/src/lib.rs @@ -117,7 +117,7 @@ pub async fn program( let zenorb_tasks = zsession .receiver(zenorb_ctx) .querying_subscriber( - "connd/net/changed", + "connd/oes/active_connections", Duration::from_millis(15), connectivity::handle_connection_event, ) diff --git a/orb-backend-status/tests/backend_status_service.rs b/orb-backend-status/tests/backend_status_service.rs index 1384384a3..073632488 100644 --- a/orb-backend-status/tests/backend_status_service.rs +++ b/orb-backend-status/tests/backend_status_service.rs @@ -38,7 +38,7 @@ async fn it_flushes_oes_events_to_backend() { let requests = fx.mock_server.received_requests().await.unwrap_or_default(); let oes_request = requests.iter().find(|r| { let body = String::from_utf8_lossy(&r.body); - body.contains("\"oes\"") + body.contains("\"oes\"") && body.contains("test_event") }); assert!( oes_request.is_some(), diff --git a/orb-backend-status/tests/fixture.rs b/orb-backend-status/tests/fixture.rs index fdfbf5ab8..9773be46b 100644 --- a/orb-backend-status/tests/fixture.rs +++ b/orb-backend-status/tests/fixture.rs @@ -1,6 +1,7 @@ use async_tempfile::TempDir; use color_eyre::Result; use dbus_launch::BusType; +use oes::{ActiveConnections, NetworkInterface}; use orb_info::{OrbId, OrbJabilId, OrbName}; use reqwest::Url; use std::{env, path::PathBuf, str::FromStr, time::Duration}; @@ -11,7 +12,7 @@ use tokio::{ }; use tokio_util::sync::CancellationToken; use wiremock::MockServer; -use zenorb::zenoh; +use zenorb::zenoh::{self, bytes::Encoding}; /// Sample /proc/net/dev content for tests const SAMPLE_NET_DEV: &str = r#"Inter-| Receive | Transmit @@ -259,32 +260,52 @@ impl Fixture { &self, state: mocks::ConnectionState, ) -> Result<()> { - use orb_connd_events::{Connection, ConnectionKind}; - let conn_event = match state { - mocks::ConnectionState::Connected => { - Connection::ConnectedGlobal(ConnectionKind::Wifi { - ssid: "TestNetwork".to_string(), - }) - } - mocks::ConnectionState::Disconnected => Connection::Disconnected, - mocks::ConnectionState::Disconnecting => Connection::Disconnecting, - mocks::ConnectionState::Connecting => Connection::Connecting, - mocks::ConnectionState::PartiallyConnected => { - Connection::ConnectedLocal(ConnectionKind::Wifi { - ssid: "TestNetwork".to_string(), - }) - } + mocks::ConnectionState::Connected => ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![oes::Connection { + name: "TestNetwork".into(), + iface: NetworkInterface::WiFi, + primary: true, + has_internet: true, + }], + }, + + mocks::ConnectionState::Disconnected => ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![], + }, + + mocks::ConnectionState::Disconnecting => ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![], + }, + + mocks::ConnectionState::Connecting => ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![], + }, + + mocks::ConnectionState::PartiallyConnected => ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![oes::Connection { + name: "TestNetwork".into(), + iface: NetworkInterface::WiFi, + primary: true, + has_internet: false, + }], + }, }; - let bytes = rkyv::to_bytes::<_, 256>(&conn_event)?; + let payload = serde_json::to_string(&conn_event).unwrap(); - let keyexpr = format!("{}/connd/net/changed", self.orb_id); + let keyexpr = format!("{}/connd/oes/active_connections", self.orb_id); let zraw = zenoh::open(zenorb::client_cfg(self.zenoh_port)) .await .map_err(|e| color_eyre::eyre::eyre!("{e}"))?; - zraw.put(keyexpr, bytes.into_vec()) + zraw.put(keyexpr, payload) + .encoding(Encoding::APPLICATION_JSON) .await .map_err(|e| color_eyre::eyre::eyre!("{e}"))?; @@ -301,20 +322,25 @@ impl Fixture { /// Helper to set connected state with a specific SSID pub async fn set_connected_with_ssid(&self, ssid: &str) -> Result<()> { - use orb_connd_events::{Connection, ConnectionKind}; - - let conn_event = Connection::ConnectedGlobal(ConnectionKind::Wifi { - ssid: ssid.to_string(), - }); + let conn_event = ActiveConnections { + connectivity_uri: "fakeurl.com".into(), + connections: vec![oes::Connection { + name: ssid.into(), + iface: NetworkInterface::WiFi, + primary: true, + has_internet: true, + }], + }; - let bytes = rkyv::to_bytes::<_, 256>(&conn_event)?; + let payload = serde_json::to_string(&conn_event).unwrap(); - let keyexpr = format!("{}/connd/net/changed", self.orb_id); + let keyexpr = format!("{}/connd/oes/active_connections", self.orb_id); let zraw = zenoh::open(zenorb::client_cfg(self.zenoh_port)) .await .map_err(|e| color_eyre::eyre::eyre!("{e}"))?; - zraw.put(keyexpr, bytes.into_vec()) + zraw.put(keyexpr, payload) + .encoding(Encoding::APPLICATION_JSON) .await .map_err(|e| color_eyre::eyre::eyre!("{e}"))?; diff --git a/orb-connd/Cargo.toml b/orb-connd/Cargo.toml index ee5a3b33b..f573548af 100644 --- a/orb-connd/Cargo.toml +++ b/orb-connd/Cargo.toml @@ -33,7 +33,6 @@ oes.workspace = true orb-backend-status-dbus.workspace = true orb-build-info.workspace = true orb-connd-dbus.workspace = true -orb-connd-events.workspace = true orb-info = { workspace = true, features = ["orb-os-release", "async"] } orb-secure-storage-ca = { workspace = true, default-features = false, features = [ "backend-in-memory", diff --git a/orb-connd/events/Cargo.toml b/orb-connd/events/Cargo.toml deleted file mode 100644 index d2c69f55a..000000000 --- a/orb-connd/events/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "orb-connd-events" -version = "0.1.0" -authors = ["Victor Ferreira Menge "] -publish = false - -edition.workspace = true -license.workspace = true -repository.workspace = true -rust-version.workspace = true - -[dependencies] -rkyv = { workspace = true, features = ["bytecheck"] } -serde = { workspace = true, features = ["derive"] } diff --git a/orb-connd/events/src/lib.rs b/orb-connd/events/src/lib.rs deleted file mode 100644 index 6eeb16409..000000000 --- a/orb-connd/events/src/lib.rs +++ /dev/null @@ -1,36 +0,0 @@ -use rkyv::bytecheck; -use rkyv::{Archive, CheckBytes, Deserialize, Serialize}; - -#[derive( - Archive, Deserialize, Serialize, Clone, Debug, PartialEq, serde::Serialize, -)] -#[archive_attr(derive(CheckBytes, Debug, PartialEq))] -pub enum Connection { - /// There is no active network connection. - Disconnected, - /// Network connections are being cleaned up. - Disconnecting, - /// A network connection is being started. - Connecting, - /// There is only local IPv4 and/or IPv6 connectivity, - /// but no default route to access the Internet. - ConnectedLocal(ConnectionKind), - /// There is only site-wide IPv4 and/or IPv6 connectivity. - /// This means a default route is available, but the Internet connectivity check - /// (see "Connectivity" property) did not succeed. - ConnectedSite(ConnectionKind), - /// There is global IPv4 and/or IPv6 Internet connectivity. - /// This means the Internet connectivity check succeeded and we have - /// full network connectivity. - ConnectedGlobal(ConnectionKind), -} - -#[derive( - Archive, Deserialize, Serialize, Clone, Debug, PartialEq, serde::Serialize, -)] -#[archive_attr(derive(CheckBytes, Debug, PartialEq))] -pub enum ConnectionKind { - Wifi { ssid: String }, - Cellular { apn: String }, - Ethernet, -} diff --git a/orb-connd/src/connectivity_daemon.rs b/orb-connd/src/connectivity_daemon.rs index df6af5821..e0e8d067a 100644 --- a/orb-connd/src/connectivity_daemon.rs +++ b/orb-connd/src/connectivity_daemon.rs @@ -19,6 +19,7 @@ use zenorb::Zenorb; #[bon::builder(finish_fn = run)] pub async fn program( sysfs: impl AsRef, + procfs: impl AsRef, usr_persistent: impl AsRef, network_manager: NetworkManager, systemd: Systemd, @@ -33,6 +34,7 @@ pub async fn program( zenoh: &Zenorb, ) -> Result> { let sysfs = sysfs.as_ref().to_path_buf(); + let procfs = procfs.as_ref().to_path_buf(); let modem_manager: Arc = Arc::new(modem_manager); let mcu_util: Arc = Arc::new(mcu_util); let statsd_client: Arc = Arc::new(statsd_client); @@ -78,8 +80,9 @@ pub async fn program( resolved, session_bus, statsd_client, - sysfs, zsender, + sysfs, + procfs, ) .await?; diff --git a/orb-connd/src/main.rs b/orb-connd/src/main.rs index 6f8e37f2d..5925338d3 100644 --- a/orb-connd/src/main.rs +++ b/orb-connd/src/main.rs @@ -107,6 +107,7 @@ fn connectivity_daemon() -> Result<()> { let speare = connectivity_daemon::program() .sysfs("/sys") + .procfs("/proc") .usr_persistent("/usr/persistent") .network_manager(nm) .resolved(resolved) diff --git a/orb-connd/src/network_manager/mod.rs b/orb-connd/src/network_manager/mod.rs index 0c890628a..da2e2c35e 100644 --- a/orb-connd/src/network_manager/mod.rs +++ b/orb-connd/src/network_manager/mod.rs @@ -645,10 +645,10 @@ impl NetworkManager { Ok(uri) } - pub async fn state(&self) -> Result { + pub async fn state(&self) -> Result { let nm = NetworkManagerProxy::new(&self.conn).await?; let state = NMState::try_from(nm.state().await?)?; - Ok(state) + Ok(state.into()) } pub async fn state_stream(&self) -> Result { @@ -1062,6 +1062,32 @@ pub struct ActiveConn { pub ipv6_addresses: Vec, } +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +pub enum ConnectionState { + Disconnected, + Disconnecting, + Connecting, + PartiallyConnected, + Connected, +} + +impl From for ConnectionState { + fn from(value: NMState) -> Self { + use ConnectionState::*; + match value { + NMState::UNKNOWN | NMState::ASLEEP | NMState::DISCONNECTED => Disconnected, + + NMState::DISCONNECTING => Disconnecting, + + NMState::CONNECTING => Connecting, + + NMState::CONNECTED_LOCAL | NMState::CONNECTED_SITE => PartiallyConnected, + + NMState::CONNECTED_GLOBAL => Connected, + } + } +} + #[cfg(test)] mod tests { use super::{AccessPoint, ApCap, WifiProfile, WifiSec}; diff --git a/orb-connd/src/reporters/active_connections.rs b/orb-connd/src/reporters/active_connections.rs index 4676451a2..e435cc027 100644 --- a/orb-connd/src/reporters/active_connections.rs +++ b/orb-connd/src/reporters/active_connections.rs @@ -1,64 +1,130 @@ -use crate::network_manager::NetworkManager; +use crate::network_manager::{self, ConnectionState, NetworkManager}; use crate::resolved::{HostnameResolution, LinkDnsStatus, Resolved}; -use color_eyre::eyre::bail; +use color_eyre::eyre::{bail, Context, ContextCompat}; use color_eyre::Result; +use futures::StreamExt; use oes::NetworkInterface; use serde::Serializer; use speare::mini; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::time::{Duration, Instant}; -use tracing::{error, info}; +use tokio::fs; +use tracing::{error, info, warn}; pub struct Args { pub nm: NetworkManager, pub resolved: Resolved, pub zsender: zenorb::Sender, + pub procfs: PathBuf, + pub sysfs: PathBuf, } pub async fn report(ctx: mini::Ctx) -> Result<()> { info!("starting active connections reporter"); - let net_state_rx = ctx - .subscribe("net-state") - .inspect_err(|e| error!("failed to subscribe to net-state {e}"))?; - while let Ok(net_state) = net_state_rx.recv_async().await { - let _ = build_and_send_report(&ctx.nm, &ctx.resolved, net_state, &ctx.zsender) + async { + let mut state_stream = ctx + .nm + .state_stream() .await - .inspect_err(|error| { - error!(?error, "active connections report failed: {error}") - }); - } + .wrap_err("failed to subscribe to NetworkManager state stream")?; - Ok(()) + let mut primary_conn_stream = + ctx.nm.primary_connection_stream().await.wrap_err( + "faield to subscribe to NetworkManager primary connection stream", + )?; + + let mut state = ctx.nm.state().await.wrap_err("failed to get nm state")?; + let mut primary_conn = ctx + .nm + .primary_connection() + .await + .inspect_err(|e| warn!("failed to get primary connection: {e}")) + .ok() + .flatten(); + + let report = build_report(&primary_conn, state, &ctx) + .await + .wrap_err("building active connections report")?; + + publish_report(&ctx, report) + .await + .wrap_err("publishing active connections report")?; + + loop { + tokio::select! { + Some(_) = state_stream.next() => (), + Some(_) = primary_conn_stream.next() => (), + }; + + let new_state = ctx.nm.state().await.wrap_err("failed to get nm state")?; + + let new_primary_conn = ctx + .nm + .primary_connection() + .await + .inspect_err(|e| warn!("failed to get primary connection: {e}")) + .ok() + .flatten(); + + let changed = (new_state != state) || (new_primary_conn != primary_conn); + state = new_state; + primary_conn = new_primary_conn; + + if changed { + let report = build_report(&primary_conn, state, &ctx) + .await + .wrap_err("building active connections report")?; + + publish_report(&ctx, report) + .await + .wrap_err("publishing active connections report")?; + } + } + + #[allow(unreachable_code)] + Ok(()) + } + .await + .inspect_err(|err| error!("active connections report failed with: {err:?}")) } -async fn build_and_send_report( - nm: &NetworkManager, - resolved: &Resolved, - primary_connection: orb_connd_events::Connection, - zsender: &zenorb::Sender, -) -> Result<()> { - let active_conns = nm.active_connections().await?; - let connectivity_uri = nm.connectivity_check_uri().await?; +/// build report based on NM inputs and system inspection +async fn build_report( + primary: &Option, + connection_state: ConnectionState, + ctx: &mini::Ctx, +) -> Result { + let active_conns = ctx.nm.active_connections().await?; + let connectivity_uri = ctx.nm.connectivity_check_uri().await?; let hostname = hostname_from_uri(&connectivity_uri).map(str::to_string); let mut report = ActiveConnections { - primary_connection, + connection_state, connectivity_uri, hostname, connections: Vec::new(), + iface_routes: InterfaceRoutes::from_fs(&ctx.sysfs, &ctx.procfs).await?, }; for conn in &active_conns { for iface in &conn.devices { - let dns_status = - resolved.link_status(iface).await.map_err(|e| e.to_string()); + let dns_status = ctx + .resolved + .link_status(iface) + .await + .map_err(|e| e.to_string()); let dns_resolution = match &report.hostname { - Some(hostname) => resolved + Some(hostname) => ctx + .resolved .resolve_hostname(iface, hostname) .await .map(Some) .map_err(|e| e.to_string()), + None => Ok(None), }; @@ -78,12 +144,12 @@ async fn build_and_send_report( .map_err(|e: color_eyre::Report| format!("{e:#}")); report.connections.push(Connection { - primary: is_primary(&report.primary_connection, &conn.id), - name: &conn.id, - iface, + primary: is_primary(primary, &conn.id), + name: conn.id.clone(), + iface: iface.to_owned(), has_internet: http_check.as_ref().is_ok_and(|x| x.status.is_success()), - ipv4_addresses: &conn.ipv4_addresses, - ipv6_addresses: &conn.ipv6_addresses, + ipv4_addresses: conn.ipv4_addresses.clone(), + ipv6_addresses: conn.ipv6_addresses.clone(), dns_status, dns_resolution, http_check, @@ -91,31 +157,26 @@ async fn build_and_send_report( } } - info!("{report:#?}"); - - if let Err(e) = publish_report(report.try_into()?, zsender).await { - error!("failed to publish active connections report: {e}"); - } - - Ok(()) + Ok(report) } #[derive(Debug, serde::Serialize)] -struct ActiveConnections<'a> { - primary_connection: orb_connd_events::Connection, +struct ActiveConnections { + connection_state: ConnectionState, connectivity_uri: String, hostname: Option, - connections: Vec>, + connections: Vec, + iface_routes: Vec, } #[derive(Debug, serde::Serialize)] -struct Connection<'a> { - name: &'a str, - iface: &'a str, +struct Connection { + name: String, + iface: String, primary: bool, has_internet: bool, - ipv4_addresses: &'a [String], - ipv6_addresses: &'a [String], + ipv4_addresses: Vec, + ipv6_addresses: Vec, dns_status: Result, dns_resolution: Result, String>, http_check: Result, @@ -132,11 +193,17 @@ struct HttpCheck { } async fn publish_report( - report: oes::ActiveConnections, - zsender: &zenorb::Sender, + ctx: &mini::Ctx, + report: ActiveConnections, ) -> Result<()> { + info!("{report:#?}"); + + let report: oes::ActiveConnections = report.try_into()?; + + let _ = ctx.publish("active_connections", report.clone()); + let bytes = serde_json::to_vec(&report)?; - zsender + ctx.zsender .publisher("oes/active_connections")? .put(&bytes) .await @@ -152,22 +219,20 @@ fn serialize_status_code( serializer.serialize_u16(status.as_u16()) } -fn is_primary(primary: &orb_connd_events::Connection, conn_name: &str) -> bool { - use orb_connd_events::{Connection::*, ConnectionKind}; - let kind = match primary { - ConnectedGlobal(k) | ConnectedSite(k) | ConnectedLocal(k) => k, - _ => return false, - }; - match kind { - ConnectionKind::Wifi { ssid } => conn_name == ssid, - ConnectionKind::Cellular { .. } => { - let name = conn_name.to_lowercase(); - name.contains("cellular") +fn is_primary(primary: &Option, conn_name: &str) -> bool { + let Some(primary) = primary else { return false }; + + match primary { + network_manager::Connection::Cellular { .. } => { + conn_name.to_lowercase().contains("cellular") } - ConnectionKind::Ethernet => { - let name = conn_name.to_lowercase(); - name.contains("wired") || name.contains("ethernet") + + network_manager::Connection::Ethernet => { + let conn_name = conn_name.to_lowercase(); + conn_name.contains("wired") || conn_name.contains("ethernet") } + + network_manager::Connection::Wifi { ssid } => ssid == conn_name, } } @@ -206,10 +271,10 @@ impl HttpCheck { } } -impl<'a> TryFrom> for oes::ActiveConnections { +impl TryFrom for oes::ActiveConnections { type Error = color_eyre::Report; - fn try_from(val: ActiveConnections<'a>) -> Result { + fn try_from(val: ActiveConnections) -> Result { let connections = val .connections .into_iter() @@ -222,7 +287,7 @@ impl<'a> TryFrom> for oes::ActiveConnections { }; Ok(oes::Connection { - name: c.name.into(), + name: c.name, iface, primary: c.primary, has_internet: c.has_internet, @@ -236,3 +301,202 @@ impl<'a> TryFrom> for oes::ActiveConnections { }) } } + +#[derive(Debug, serde::Serialize)] +struct InterfaceRoutes { + ifname: String, + operstate: String, + routes: Vec, +} + +#[derive(Debug, serde::Serialize)] +struct Route { + destination: String, + metric: u64, +} + +type Iface = String; +type Operstate = String; + +impl InterfaceRoutes { + async fn from_fs( + sysfs: impl AsRef, + procfs: impl AsRef, + ) -> Result> { + let mut ifaces = get_interfaces_operstate(sysfs).await?; + let routes = get_routes(procfs).await?; + + let mut ifaceroutes: HashMap)> = + HashMap::with_capacity(ifaces.len()); + + for (iface, route) in routes.into_iter() { + match ifaceroutes.entry(iface) { + Entry::Occupied(mut entry) => { + entry.get_mut().1.push(route); + } + + Entry::Vacant(entry) => { + let Some(operstate) = ifaces.remove(entry.key()) else { + continue; + }; + + entry.insert((operstate, vec![route])); + } + } + } + + Ok(ifaceroutes + .into_iter() + .map(|(ifname, (operstate, routes))| InterfaceRoutes { + ifname, + operstate, + routes, + }) + .collect()) + } +} + +async fn get_interfaces_operstate( + sysfs: impl AsRef, +) -> Result> { + let ifaces_dir = sysfs.as_ref().join("class").join("net"); + let mut dir = fs::read_dir(ifaces_dir).await?; + let mut interfaces = HashMap::new(); + + while let Some(entry) = dir.next_entry().await? { + let path = entry.path(); + let Some(iface) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + + let operstate = fs::read_to_string(path.join("operstate")).await?; + interfaces.insert(iface.to_string(), operstate.trim().to_string()); + } + + Ok(interfaces) +} + +async fn get_routes(procfs: impl AsRef) -> Result> { + let path = procfs.as_ref().join("net").join("route"); + let routes = fs::read_to_string(path).await?; + + routes + .lines() + .skip(1) // header, see test for context + .filter(|line| !line.trim().is_empty()) + .map(|line| { + let mut cols = line.split_whitespace(); + + let iface = cols + .next() + .wrap_err_with(|| format!("invalid /proc/net/route line: {line}"))?; + + let destination = cols + .next() + .wrap_err_with(|| format!("invalid /proc/net/route line: {line}"))?; + + cols.next(); // gateway + cols.next(); // flags + cols.next(); // refcnt + cols.next(); // use + + let metric = cols + .next() + .wrap_err_with(|| format!("invalid /proc/net/route line: {line}"))?; + + Ok(( + iface.to_owned(), + Route { + destination: destination.to_owned(), + metric: metric.parse()?, + }, + )) + }) + .collect() +} + +#[cfg(test)] +mod test { + use super::*; + use async_tempfile::TempDir; + + #[tokio::test] + async fn get_interfaces_reads_operstate_for_each_interface() { + // Arrange + let sysfs = TempDir::new().await.unwrap(); + let sysfs_path = sysfs.to_path_buf(); + let net_dir = sysfs_path.join("class").join("net"); + + fs::create_dir_all(net_dir.join("eth0")).await.unwrap(); + fs::create_dir_all(net_dir.join("wlan0")).await.unwrap(); + fs::create_dir_all(net_dir.join("wwan0")).await.unwrap(); + + fs::write(net_dir.join("eth0").join("operstate"), "down\n") + .await + .unwrap(); + fs::write(net_dir.join("wlan0").join("operstate"), "up\n") + .await + .unwrap(); + fs::write(net_dir.join("wwan0").join("operstate"), "unknown\n") + .await + .unwrap(); + + // Act + let interfaces = get_interfaces_operstate(&sysfs_path).await.unwrap(); + + // Assert + assert_eq!( + interfaces, + HashMap::from([ + ("eth0".to_string(), "down".to_string()), + ("wlan0".to_string(), "up".to_string()), + ("wwan0".to_string(), "unknown".to_string()), + ]) + ); + } + + #[tokio::test] + async fn get_routes_reads_routes_from_procfs() { + // Arrange + let procfs = TempDir::new().await.unwrap(); + let procfs_path = procfs.to_path_buf(); + let route_dir = procfs_path.join("net"); + let route_path = route_dir.join("route"); + + fs::create_dir_all(&route_dir).await.unwrap(); + fs::write( + &route_path, + concat!( + "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT\n", + "eth0\t0010A8C0\t00000000\t0001\t0\t0\t100\t00FFFFFF\t0\t0\t0\n", + "wlan0\t00000000\t01006C0A\t0003\t0\t0\t400\t00000000\t0\t0\t0\n", + "wwan0\t00000000\t39A54664\t0003\t0\t0\t500\t00000000\t0\t0\t0\n", + "wlan0\t00006C0A\t00000000\t0001\t0\t0\t400\t0000FFFF\t0\t0\t0\n", + "wwan0\t30A54664\t00000000\t0001\t0\t0\t500\tF0FFFFFF\t0\t0\t0\n", + ), + ) + .await + .unwrap(); + + // Act + let routes = get_routes(&procfs_path).await.unwrap(); + + // Assert + assert_eq!(routes.len(), 5); + assert_eq!(routes[0].0, "eth0"); + assert_eq!(routes[0].1.destination, "0010A8C0"); + assert_eq!(routes[0].1.metric, 100); + assert_eq!(routes[1].0, "wlan0"); + assert_eq!(routes[1].1.destination, "00000000"); + assert_eq!(routes[1].1.metric, 400); + assert_eq!(routes[2].0, "wwan0"); + assert_eq!(routes[2].1.destination, "00000000"); + assert_eq!(routes[2].1.metric, 500); + assert_eq!(routes[3].0, "wlan0"); + assert_eq!(routes[3].1.destination, "00006C0A"); + assert_eq!(routes[3].1.metric, 400); + assert_eq!(routes[4].0, "wwan0"); + assert_eq!(routes[4].1.destination, "30A54664"); + assert_eq!(routes[4].1.metric, 500); + } +} diff --git a/orb-connd/src/reporters/connd_report.rs b/orb-connd/src/reporters/connd_report.rs index b9a4a96c5..f2be630b0 100644 --- a/orb-connd/src/reporters/connd_report.rs +++ b/orb-connd/src/reporters/connd_report.rs @@ -20,15 +20,15 @@ pub async fn report(ctx: mini::Ctx) -> Result<()> { info!("starting connd report reporter"); async { - let net_state_rx: Receiver = - ctx.subscribe("net-state")?; + let active_conns_rx: Receiver = + ctx.subscribe("active_connections")?; let mut interval = time::interval(ctx.report_interval); interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); loop { tokio::select! { - Ok(_) = net_state_rx.recv_async() => {} + Ok(_) = active_conns_rx.recv_async() => {} _ = interval.tick() => {} }; diff --git a/orb-connd/src/reporters/mod.rs b/orb-connd/src/reporters/mod.rs index 7f1512abf..dfc207410 100644 --- a/orb-connd/src/reporters/mod.rs +++ b/orb-connd/src/reporters/mod.rs @@ -10,7 +10,6 @@ pub mod active_connections; pub mod cellular_status; pub mod connd_report; pub mod datadog; -pub mod net_state; pub mod net_stats; #[allow(clippy::too_many_arguments)] @@ -20,20 +19,12 @@ pub async fn spawn( resolved: Resolved, session_bus: zbus::Connection, statsd: Arc, - sysfs: PathBuf, zsender: zenorb::Sender, + sysfs: PathBuf, + procfs: PathBuf, ) -> Result<()> { info!("starting reporter tasks"); - speare - .task_with() - .args(net_state::Args { - nm: nm.clone(), - zsender: zsender.clone(), - }) - .on_err(static_backoff(15)) - .spawn(net_state::report)?; - speare .task_with() .args(cellular_status::Args { @@ -47,7 +38,7 @@ pub async fn spawn( .task_with() .args(net_stats::Args { poll_interval: Duration::from_secs(30), - sysfs, + sysfs: sysfs.clone(), zsender: zsender.clone(), }) .on_err(static_backoff(15)) @@ -75,6 +66,8 @@ pub async fn spawn( nm, resolved, zsender, + sysfs, + procfs, }) .on_err(static_backoff(15)) .spawn(active_connections::report)?; diff --git a/orb-connd/src/reporters/net_state.rs b/orb-connd/src/reporters/net_state.rs deleted file mode 100644 index 94462519e..000000000 --- a/orb-connd/src/reporters/net_state.rs +++ /dev/null @@ -1,80 +0,0 @@ -use crate::network_manager::{Connection, NetworkManager}; -use color_eyre::{eyre::eyre, Result}; -use futures::StreamExt; -use orb_connd_events::ConnectionKind; -use rusty_network_manager::dbus_interface_types::NMState; -use speare::mini; -use tracing::{info, warn}; - -pub struct Args { - pub nm: NetworkManager, - pub zsender: zenorb::Sender, -} - -pub async fn report(ctx: mini::Ctx) -> Result<()> { - info!("starting netstate reporter"); - - let publisher = ctx.zsender.publisher("net/changed")?; - let mut state_stream = ctx.nm.state_stream().await?; - let mut primary_conn_stream = ctx.nm.primary_connection_stream().await?; - - let nm_state = ctx.nm.state().await?; - let mut conn_event = connection_event(nm_state, ctx.nm.primary_connection().await?); - - let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; - publisher - .put(bytes.into_vec()) - .await - .map_err(|e| eyre!("{e}"))?; - - let _ = ctx - .publish("net-state", conn_event.clone()) - .inspect_err(|e| warn!(error = ?e, "failed to send net state event")); - - loop { - tokio::select! { - _ = state_stream.next() => {} - _ = primary_conn_stream.next() => {} - }; - - let new_conn_event = - connection_event(ctx.nm.state().await?, ctx.nm.primary_connection().await?); - - let changed = conn_event != new_conn_event; - conn_event = new_conn_event; - - if changed { - let bytes = rkyv::to_bytes::<_, 64>(&conn_event)?; - publisher - .put(bytes.into_vec()) - .await - .map_err(|e| eyre!("{e}"))?; - - let _ = ctx - .publish("net-state", conn_event.clone()) - .inspect_err(|e| warn!(error = ?e, "failed to send net state event")); - } - } -} - -fn connection_event( - state: NMState, - active_conn: Option, -) -> orb_connd_events::Connection { - use orb_connd_events::Connection::*; - let kind = active_conn.map(|c| match c { - Connection::Cellular { apn } => ConnectionKind::Cellular { apn }, - Connection::Wifi { ssid } => ConnectionKind::Wifi { ssid }, - Connection::Ethernet => ConnectionKind::Ethernet, - }); - - match (state, kind) { - (NMState::CONNECTED_GLOBAL, Some(kind)) => ConnectedGlobal(kind), - (NMState::CONNECTED_SITE, Some(kind)) => ConnectedSite(kind), - (NMState::CONNECTED_LOCAL, Some(kind)) => ConnectedLocal(kind), - (NMState::CONNECTING, _) => Connecting, - (NMState::DISCONNECTING, _) => Disconnecting, - (NMState::UNKNOWN | NMState::ASLEEP | NMState::DISCONNECTED, _) => Disconnected, - _ => Disconnected, - } -} diff --git a/orb-connd/src/service/dbus.rs b/orb-connd/src/service/dbus.rs index 4b4182e88..2ffc51d4b 100644 --- a/orb-connd/src/service/dbus.rs +++ b/orb-connd/src/service/dbus.rs @@ -1,7 +1,7 @@ use std::time::{Duration, Instant}; use crate::{ - network_manager::{AccessPoint, ActiveConnState, WifiProfile, WifiSec}, + network_manager::{self, AccessPoint, ActiveConnState, WifiProfile, WifiSec}, service::{netconfig::NetConfig, wifi, ConndService}, utils::IntoZResult, OrbCapabilities, @@ -11,9 +11,7 @@ use chrono::Utc; use color_eyre::eyre::{eyre, ContextCompat}; use orb_connd_dbus::{ConndT, ConnectionState}; use orb_info::orb_os_release::OrbRelease; -use rusty_network_manager::dbus_interface_types::{ - NM80211Mode, NMConnectivityState, NMState, -}; +use rusty_network_manager::dbus_interface_types::{NM80211Mode, NMConnectivityState}; use tokio::time; use tracing::{error, info, warn}; use zbus::fdo::{Error as ZErr, Result as ZResult}; @@ -502,22 +500,9 @@ impl ConndT for ConndService { self.nm.check_connectivity().await.into_z()?; let value = self.nm.state().await.into_z()?; - use ConnectionState::*; - let state = match value { - NMState::UNKNOWN | NMState::ASLEEP | NMState::DISCONNECTED => Disconnected, - - NMState::DISCONNECTING => Disconnecting, - - NMState::CONNECTING => Connecting, - - NMState::CONNECTED_LOCAL | NMState::CONNECTED_SITE => PartiallyConnected, - - NMState::CONNECTED_GLOBAL => Connected, - }; - // info!("connection state: {state:?}"); - Ok(state) + Ok(ConnectionState::from(value)) } } @@ -574,3 +559,16 @@ impl AccessPoint { } } } + +impl From for ConnectionState { + fn from(value: network_manager::ConnectionState) -> Self { + use network_manager::ConnectionState::*; + match value { + Disconnected => ConnectionState::Disconnected, + Disconnecting => ConnectionState::Disconnecting, + Connecting => ConnectionState::Connecting, + PartiallyConnected => ConnectionState::PartiallyConnected, + Connected => ConnectionState::Connected, + } + } +} diff --git a/orb-connd/tests/fixture.rs b/orb-connd/tests/fixture.rs index 98e75284f..1a142a715 100644 --- a/orb-connd/tests/fixture.rs +++ b/orb-connd/tests/fixture.rs @@ -43,6 +43,7 @@ pub struct Fixture { conn: zbus::Connection, speare: mini::Ctx<()>, pub sysfs: PathBuf, + pub procfs: PathBuf, pub usr_persistent: PathBuf, pub secure_storage: SecureStorage, pub secure_storage_cancel_token: CancellationToken, @@ -92,27 +93,58 @@ impl Fixture { setup_container(TempDir::new().await.unwrap()).await; let sysfs = container.tempdir.dir_path().join("sysfs"); + let procfs = container.tempdir.dir_path().join("procfs"); let usr_persistent = container.tempdir.dir_path().join("usr_persistent"); + let network_manager_folder = usr_persistent.join("network-manager"); fs::create_dir_all(&sysfs).await.unwrap(); + fs::create_dir_all(&procfs).await.unwrap(); fs::create_dir_all(&usr_persistent).await.unwrap(); fs::create_dir_all(&network_manager_folder).await.unwrap(); - if cap == OrbCapabilities::CellularAndWifi { - let stats = sysfs - .join("class") - .join("net") - .join("wwan0") - .join("statistics"); + let net_dir = sysfs.join("class").join("net"); + fs::create_dir_all(net_dir.join("eth0")).await.unwrap(); + fs::create_dir_all(net_dir.join("wlan0")).await.unwrap(); + + fs::write(net_dir.join("eth0").join("operstate"), "down\n") + .await + .unwrap(); + fs::write(net_dir.join("wlan0").join("operstate"), "up\n") + .await + .unwrap(); + if cap == OrbCapabilities::CellularAndWifi { + let stats = net_dir.join("wwan0").join("statistics"); let tx = stats.join("tx_bytes"); let rx = stats.join("rx_bytes"); fs::create_dir_all(stats).await.unwrap(); fs::write(tx, "0").await.unwrap(); fs::write(rx, "0").await.unwrap(); + + fs::write(net_dir.join("wwan0").join("operstate"), "unknown\n") + .await + .unwrap(); } + let procnet = procfs.join("net"); + let route_path = procnet.join("route"); + + fs::create_dir_all(&procnet).await.unwrap(); + fs::write( + &route_path, + concat!( + "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT\n", + "eth0\t0010A8C0\t00000000\t0001\t0\t0\t100\t00FFFFFF\t0\t0\t0\n", + "wlan0\t00000000\t01006C0A\t0003\t0\t0\t400\t00000000\t0\t0\t0\n", + "wwan0\t00000000\t39A54664\t0003\t0\t0\t500\t00000000\t0\t0\t0\n", + "wlan0\t00006C0A\t00000000\t0001\t0\t0\t400\t0000FFFF\t0\t0\t0\n", + "wwan0\t30A54664\t00000000\t0001\t0\t0\t500\tF0FFFFFF\t0\t0\t0\n", + ), + ) + .await + .unwrap(); + time::sleep(Duration::from_secs(1)).await; let dbus_socket = container.tempdir.dir_path().join("socket"); @@ -183,6 +215,7 @@ impl Fixture { .systemd(Systemd::new(conn.clone())) .statsd_client(statsd.unwrap_or(MockStatsd)) .sysfs(sysfs.clone()) + .procfs(procfs.clone()) .usr_persistent(usr_persistent.clone()) .session_bus(conn.clone()) .connect_timeout(Duration::from_secs(1)) @@ -207,6 +240,7 @@ impl Fixture { speare, container, sysfs, + procfs, usr_persistent, secure_storage, secure_storage_cancel_token: cancel_token, @@ -284,6 +318,7 @@ impl Fixture { .systemd(Systemd::new(self.conn.clone())) .statsd_client(MockStatsd) .sysfs(self.sysfs.clone()) + .procfs(self.procfs.clone()) .usr_persistent(self.usr_persistent.clone()) .session_bus(self.conn.clone()) .connect_timeout(Duration::from_secs(1)) diff --git a/orb-connd/tests/reporter.rs b/orb-connd/tests/reporter.rs deleted file mode 100644 index 14a17abe3..000000000 --- a/orb-connd/tests/reporter.rs +++ /dev/null @@ -1,46 +0,0 @@ -use crate::fixture::Fixture; -use orb_info::orb_os_release::{OrbOsPlatform, OrbRelease}; -use rkyv::AlignedVec; -use std::time::Duration; -use tokio::time; - -mod fixture; - -#[tokio::test(flavor = "multi_thread", worker_threads = 1)] -async fn it_publishes_net_changed() { - // Arrange - let fx = Fixture::platform(OrbOsPlatform::Diamond) - .release(OrbRelease::Dev) - .run() - .await; - - let zenoh = fx.zenoh().await; - - // Act - time::sleep(Duration::from_secs(2)).await; - - let get = zenoh - .get(format!("{}/connd/net/changed", fx.orb_id)) - .await - .unwrap(); - - let msg = time::timeout(Duration::from_secs(2), get.recv_async()) - .await - .unwrap() - .unwrap() - .into_result() - .unwrap(); - - let mut bytes = AlignedVec::with_capacity(msg.payload().len()); - bytes.extend_from_slice(&msg.payload().to_bytes()); - let archived = - rkyv::check_archived_root::(&bytes).unwrap(); - - // Assert - // this is Disconnected, because there is no primary connection (we are using host internet - // and not a connection from network manager), and the event depends on having a primary connection - assert_eq!( - archived, - &orb_connd_events::ArchivedConnection::Disconnected - ); -} diff --git a/orb-jobs-agent/Cargo.toml b/orb-jobs-agent/Cargo.toml index e64e0a584..babd08054 100644 --- a/orb-jobs-agent/Cargo.toml +++ b/orb-jobs-agent/Cargo.toml @@ -22,6 +22,7 @@ figment = { version = "0.10.8", features = ["env", "toml"] } futures.workspace = true libc.workspace = true listenfd = "1.0.0" +oes.workspace = true once_cell = "1.15.0" orb-build-info.workspace = true orb-endpoints.workspace = true @@ -52,7 +53,6 @@ tracing = { workspace = true, features = ["attributes"] } zbus = { workspace = true, default-features = false, features = ["tokio"] } zbus_systemd = { workspace = true, features = ["systemd1", "login1"] } orb-connd-dbus.workspace = true -orb-connd-events.workspace = true zenorb.workspace = true rkyv = { workspace = true, features = ["validation"] } diff --git a/orb-jobs-agent/src/conn_change.rs b/orb-jobs-agent/src/conn_change.rs index 3cc43430e..9e92b3a51 100644 --- a/orb-jobs-agent/src/conn_change.rs +++ b/orb-jobs-agent/src/conn_change.rs @@ -1,8 +1,6 @@ use crate::job_system::client::JobClient; -use color_eyre::{eyre::eyre, Result}; -use orb_connd_events::ArchivedConnection; +use color_eyre::Result; use orb_info::OrbId; -use rkyv::AlignedVec; use tracing::info; use zenorb::Zenorb; @@ -12,6 +10,7 @@ pub async fn spawn_watcher( client: JobClient, zenoh_port: u16, ) -> Result { + info!("setting up zenoh subscribers"); let session = Zenorb::from_cfg(zenorb::client_cfg(zenoh_port)) .orb_id(orb_id) .with_name("jobs-agent") @@ -19,30 +18,18 @@ pub async fn spawn_watcher( session .receiver(client) - .subscriber("connd/net/changed", async |client, sample| { - let mut bytes = AlignedVec::with_capacity(sample.payload().len()); - bytes.extend_from_slice(&sample.payload().to_bytes()); - let archived = - rkyv::check_archived_root::(&bytes) - .map_err(|e| eyre!("failed to deserialize Connection evt {e}"))?; + .subscriber("connd/oes/active_connections", async |client, sample| { + let active_conns: oes::ActiveConnections = serde_json::from_slice(&sample.payload().to_bytes())?; + let is_online = active_conns.connections.iter().any(|c|c.has_internet); + let primary = active_conns.connections.iter().find(|c|c.primary).map(|c|&c.name); - match archived { - ArchivedConnection::ConnectedGlobal(kind) => { - info!( - ?kind, - "detected changed in connectivity, force relay reconnection" - ); + if !is_online { + info!("detected changed in connectivity, but we have no global connectivity. doing nothing"); + return Ok(()) + } - client.force_relay_reconnect().await?; - } - - conn => { - info!( - ?conn, - "detected changed in connectivity, but we have no global connectivity. doing nothing" - ); - } - }; + info!("new primary connection: {primary:?}, forcing relay reconnection"); + client.force_relay_reconnect().await?; Ok(()) }) diff --git a/zorb/Cargo.toml b/zorb/Cargo.toml index 896ab0ba1..4d6b089b2 100644 --- a/zorb/Cargo.toml +++ b/zorb/Cargo.toml @@ -12,7 +12,6 @@ authors = ["Victor Ferreira Menge "] clap = { workspace = true, features = ["derive"] } color-eyre.workspace = true orb-build-info.workspace = true -orb-connd-events.workspace = true orb-info = { workspace = true, features = ["orb-id"] } rkyv = { workspace = true, features = ["bytecheck", "validation"] } serde_json.workspace = true diff --git a/zorb/src/main.rs b/zorb/src/main.rs index eae2acce0..a073c1546 100644 --- a/zorb/src/main.rs +++ b/zorb/src/main.rs @@ -5,10 +5,10 @@ use color_eyre::{ }; use orb_build_info::{make_build_info, BuildInfo}; use orb_info::OrbId; -use std::{borrow::Cow, net::IpAddr, process::Stdio, str::FromStr}; +use std::{net::IpAddr, process::Stdio, str::FromStr}; use tokio::process::Command; use zenorb::{zenoh::bytes::Encoding, Zenorb}; -use zorb::{color, register_rkyv_types, Example}; +use zorb::{color, Example}; const BUILD_INFO: BuildInfo = make_build_info!(); @@ -45,9 +45,6 @@ enum Cmd { Sub { /// The key expression to subscribe to keyexpr: String, - /// Fully qualified name of the type to deserialize - #[arg(short = 't', long = "type")] - type_name: Option, }, /// Execute a command when a message is received @@ -55,9 +52,6 @@ enum Cmd { When { /// The key expression to subscribe to keyexpr: String, - /// Fully qualified name of the type to deserialize - #[arg(short = 't', long = "type")] - type_name: Option, /// The command to execute #[arg(trailing_var_arg = true, allow_hyphen_values = true)] command: Vec, @@ -68,9 +62,6 @@ enum Cmd { async fn main() -> Result<()> { color_eyre::install()?; - let rkyv_registry = - register_rkyv_types!(zorb::Example, orb_connd_events::Connection); - let cli = Cli::parse(); let orb_id = match cli.orb_id { @@ -128,7 +119,7 @@ async fn main() -> Result<()> { println!("published to {keyexpr} successfully"); } - Cmd::Sub { keyexpr, type_name } => { + Cmd::Sub { keyexpr } => { println!("Subscribing to {keyexpr}"); let rx = zenorb @@ -158,27 +149,11 @@ async fn main() -> Result<()> { } &Encoding::ZENOH_BYTES => { - let rkyv_deser = type_name - .as_ref() - .and_then(|t| rkyv_registry.get(t.as_str())); - - match rkyv_deser { - None => println!( - "{} {} :: could not deserialize", - color::timestamp(), - color::key_expr(sample.key_expr()) - ), - - Some(deser_fn) => { - let contents = deser_fn(&sample.payload().to_bytes())?; - println!( - "{} {} :: {contents}", - color::timestamp(), - color::key_expr(sample.key_expr()) - ); - } - } - println!("bytes!"); + println!( + "{} {} :: could not deserialize", + color::timestamp(), + color::key_expr(sample.key_expr()) + ); } other => { @@ -188,11 +163,7 @@ async fn main() -> Result<()> { } } - Cmd::When { - keyexpr, - type_name, - command, - } => { + Cmd::When { keyexpr, command } => { let command = command.join(" "); println!("Subscribing to {keyexpr}"); @@ -218,31 +189,12 @@ async fn main() -> Result<()> { } &Encoding::ZENOH_BYTES => { - let rkyv_deser = type_name - .as_ref() - .and_then(|t| rkyv_registry.get(t.as_str())); - - let cmd = match rkyv_deser { - None => { - println!( - "{} {} :: could not deserialize, will execute command without substitution", - color::timestamp(), - color::key_expr(sample.key_expr()) - ); - - Cow::Borrowed(&command) - } - - Some(deser_fn) => { - let contents = deser_fn(&sample.payload().to_bytes())?; - Cow::Owned(command.replace("%s%", &contents)) - } - }; + println!("{} {} :: could not deserialize, will execute command without substitution", color::timestamp(), color::key_expr(sample.key_expr())); Command::new("/usr/bin/env") .arg("bash") .arg("-c") - .arg(cmd.as_str()) + .arg(&command) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) .status() From 863d51b0be82f8428231a0d40483ab8c4b16edcd Mon Sep 17 00:00:00 2001 From: vmenge Date: Fri, 27 Mar 2026 18:31:44 +0100 Subject: [PATCH 53/66] feat(backend-status): oes cache (#1120) ## changes - shared client for backend status endpoint, handling lack of token or internet - oes cacheing (hacky version) for `connd/active_connections` - no silent handling of uptime errors - use `connd/oes/active_connections` for connectivity state instead of `connd/net_changed` ## fixes - bug where backend status client would be stuck with old connection pool when switching primary connections - [x] tested on an orb --- Cargo.lock | 1 + orb-backend-status/Cargo.toml | 1 + orb-backend-status/dbus/src/lib.rs | 14 + orb-backend-status/src/backend/client.rs | 161 +++++++ orb-backend-status/src/backend/mod.rs | 3 +- orb-backend-status/src/backend/status.rs | 433 ------------------ .../src/backend/status_req_builder.rs | 300 ++++++++++++ orb-backend-status/src/backend/types.rs | 1 + orb-backend-status/src/backend/uptime.rs | 44 +- .../src/collectors/connectivity.rs | 5 +- orb-backend-status/src/collectors/mod.rs | 3 +- orb-backend-status/src/collectors/oes.rs | 6 + orb-backend-status/src/dbus/intf_impl.rs | 8 +- orb-backend-status/src/lib.rs | 80 ++-- orb-backend-status/src/main.rs | 2 - orb-backend-status/src/oes_cache.rs | 31 ++ orb-backend-status/src/oes_flusher.rs | 121 ++--- orb-backend-status/src/oes_reroute.rs | 3 +- orb-backend-status/src/sender.rs | 121 ++--- .../tests/backend_status_service.rs | 57 --- orb-backend-status/tests/fixture.rs | 10 - 21 files changed, 659 insertions(+), 746 deletions(-) create mode 100644 orb-backend-status/src/backend/client.rs delete mode 100644 orb-backend-status/src/backend/status.rs create mode 100644 orb-backend-status/src/backend/status_req_builder.rs create mode 100644 orb-backend-status/src/oes_cache.rs diff --git a/Cargo.lock b/Cargo.lock index c4f3d834d..24f1931f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7934,6 +7934,7 @@ dependencies = [ "chrono", "color-eyre", "dbus-launch", + "derive_more 2.1.0", "eyre", "flume", "futures-util", diff --git a/orb-backend-status/Cargo.toml b/orb-backend-status/Cargo.toml index 1a12b997a..796e16dbe 100644 --- a/orb-backend-status/Cargo.toml +++ b/orb-backend-status/Cargo.toml @@ -14,6 +14,7 @@ rust-version.workspace = true bon.workspace = true chrono = { workspace = true, features = ["serde"] } color-eyre.workspace = true +derive_more = { workspace = true, default-features = false, features = ["from"] } flume.workspace = true futures-util = { version = "0.3.31" } eyre.workspace = true diff --git a/orb-backend-status/dbus/src/lib.rs b/orb-backend-status/dbus/src/lib.rs index 0f48eeef1..61277fa68 100644 --- a/orb-backend-status/dbus/src/lib.rs +++ b/orb-backend-status/dbus/src/lib.rs @@ -12,26 +12,40 @@ pub mod constants { pub const INTERFACE_NAME: &str = "org.worldcoin.BackendStatus1"; } +/// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE +/// If you need to send new data types to the backend, use the OES pub trait BackendStatusT: Send + Sync + 'static { + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_update_progress( &self, update_progress: UpdateProgress, trace_ctx: TraceCtx, ) -> Result<()>; + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_net_stats(&self, net_stats: NetStats, trace_ctx: TraceCtx) -> Result<()>; + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_cellular_status(&self, status: CellularStatus) -> Result<()>; + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_connd_report(&self, report: ConndReport) -> Result<()>; + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_core_stats( &self, core_stats: CoreStats, trace_ctx: TraceCtx, ) -> Result<()>; + /// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE + /// If you need to send new data types to the backend, use the OES fn provide_signup_state( &self, signup_state: SignupState, diff --git a/orb-backend-status/src/backend/client.rs b/orb-backend-status/src/backend/client.rs new file mode 100644 index 000000000..77e89a2b2 --- /dev/null +++ b/orb-backend-status/src/backend/client.rs @@ -0,0 +1,161 @@ +use crate::{ + backend::types::{OrbStatusApiV2, VersionApiV2}, + collectors::connectivity::GlobalConnectivity, +}; +use chrono::Utc; +use color_eyre::Result; +use derive_more::From; +use eyre::Context; +use orb_info::{OrbId, OrbJabilId, OrbName}; +use reqwest::{Response, Url}; +use reqwest_middleware::{ClientBuilder, ClientWithMiddleware, Extension}; +use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware}; +use reqwest_tracing::{OtelName, TracingMiddleware}; +use std::time::Duration; +use tokio::{ + sync::{oneshot, watch}, + task::{AbortHandle, JoinHandle}, +}; +use tracing::{error, info}; + +type ReqTx = (OrbStatusApiV2, oneshot::Sender>); + +#[derive(From)] +pub enum Err { + MissingAttestToken, + NoConnectivity, + #[from] + Other(color_eyre::Report), +} + +#[derive(Clone)] +pub struct StatusClient { + handle: AbortHandle, + req_tx: flume::Sender, +} + +impl Drop for StatusClient { + fn drop(&mut self) { + self.handle.abort(); + } +} + +#[bon::bon] +impl StatusClient { + #[builder] + pub fn new( + orb_id: OrbId, + orb_name: OrbName, + jabil_id: OrbJabilId, + orb_os_version: String, + endpoint: Url, + req_timeout: Duration, + min_req_retry_interval: Duration, + max_req_retry_interval: Duration, + mut attest_token_rx: watch::Receiver, + mut connectivity_rx: watch::Receiver, + ) -> Self { + info!("spawning backend-status client, orb_os_version: {orb_os_version}"); + + let (req_tx, req_rx) = flume::unbounded::(); + + let handle: JoinHandle> = tokio::spawn(async move { + let orb_id = orb_id.as_str().to_string(); + + let make_client = || -> Result { + let retry_policy = ExponentialBackoff::builder() + .retry_bounds(min_req_retry_interval, max_req_retry_interval) + .build_with_max_retries(3); + + let reqwest_client = reqwest::Client::builder() + .timeout(req_timeout) + .user_agent("orb-backend-status") + .build() + .wrap_err("failed to build reqwest client")?; + + let client = ClientBuilder::new(reqwest_client) + .with_init(Extension(OtelName(orb_id.clone().into()))) + .with(TracingMiddleware::default()) + .with(RetryTransientMiddleware::new_with_policy(retry_policy)) + .build(); + + Ok(client) + }; + + let mut client = make_client() + .inspect_err(|e| error!("failed to create http client: {e:?}"))?; + + let mut attest_token = String::new(); + let mut connectivity = connectivity_rx.borrow_and_update().clone(); + + info!("client with connectivity: {connectivity:?}"); + + loop { + tokio::select! { + biased; + + Ok(_) = attest_token_rx.changed() => { + info!("new attest token received!"); + let t = &attest_token_rx.borrow_and_update(); + attest_token.clear(); + attest_token.push_str(t); + } + + Ok(_) = connectivity_rx.changed() => { + info!("connectivity status changed"); + connectivity = connectivity_rx.borrow_and_update().clone(); + if connectivity.is_connected() { + client = make_client() + .inspect_err(|e| error!("failed to create http client: {e:?}"))?; + } + } + + Ok((req, res_tx)) = req_rx.recv_async() => { + let res = if attest_token.is_empty() { + Err(Err::MissingAttestToken) + } else if !connectivity.is_connected() { + Err(Err::NoConnectivity) + } else { + let req = OrbStatusApiV2 { + orb_name: Some(orb_name.to_string()), + jabil_id: Some(jabil_id.to_string()), + version: Some(VersionApiV2 { + current_release: Some(orb_os_version.to_string()), + }), + timestamp: Utc::now(), + ..req + }; + + client + .post(endpoint.clone()) + .json(&req) + .basic_auth(&orb_id, Some(attest_token.clone())) + .send() + .await + .wrap_err("failed to send request") + .map_err(Err::Other) + }; + + let _ = res_tx.send(res); + } + }; + } + }); + + Self { + handle: handle.abort_handle(), + req_tx, + } + } + + pub async fn req(&self, payload: OrbStatusApiV2) -> Result { + let (tx, rx) = oneshot::channel(); + self.req_tx + .send((payload, tx)) + .wrap_err("req_tx send failed")?; + + let res = rx.await.wrap_err("request oneshot failed")??; + + Ok(res) + } +} diff --git a/orb-backend-status/src/backend/mod.rs b/orb-backend-status/src/backend/mod.rs index a3f714de5..b9d42421f 100644 --- a/orb-backend-status/src/backend/mod.rs +++ b/orb-backend-status/src/backend/mod.rs @@ -1,5 +1,6 @@ pub(crate) mod types; mod uptime; +pub mod client; pub mod os_version; -pub mod status; +pub mod status_req_builder; diff --git a/orb-backend-status/src/backend/status.rs b/orb-backend-status/src/backend/status.rs deleted file mode 100644 index 47ef0579b..000000000 --- a/orb-backend-status/src/backend/status.rs +++ /dev/null @@ -1,433 +0,0 @@ -use crate::{ - backend::{ - types::{ - AmbientLightApiV2, BatteryApiV2, CellularStatusApiV2, HardwareStateApiV2, - MainMcuApiV2, SsdStatusApiV2, TemperatureApiV2, WifiApiV2, WifiDataApiV2, - WifiQualityApiV2, - }, - uptime::orb_uptime, - }, - collectors::front_als::flag_to_api_str, - dbus::intf_impl::CurrentStatus, -}; -use chrono::Utc; -use eyre::{Result, WrapErr}; -use orb_info::{OrbId, OrbJabilId, OrbName}; -use reqwest::Url; -use reqwest_middleware::{ClientBuilder, ClientWithMiddleware, Extension}; -use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware}; -use reqwest_tracing::{OtelName, TracingMiddleware}; -use std::time::Duration; -use tracing::{info, instrument}; - -use super::types::{ - ConndReportApiV2, LocationDataApiV2, NetIntfApiV2, NetStatsApiV2, OrbStatusApiV2, - UpdateProgressApiV2, VersionApiV2, WifiProfileApiV2, -}; - -#[derive(Debug, Clone)] -pub struct StatusClient { - client: ClientWithMiddleware, - orb_id: OrbId, - orb_name: OrbName, - jabil_id: OrbJabilId, - orb_os_version: String, - endpoint: Url, -} - -impl StatusClient { - #[allow(clippy::too_many_arguments)] - pub async fn new( - endpoint: Url, - orb_os_version: String, - orb_id: OrbId, - orb_name: OrbName, - jabil_id: OrbJabilId, - req_timeout: Duration, - min_req_retry_interval: Duration, - max_req_retry_interval: Duration, - ) -> Result { - info!("backend-status orb_os_version: {}", orb_os_version); - - let retry_policy = ExponentialBackoff::builder() - .retry_bounds(min_req_retry_interval, max_req_retry_interval) - .build_with_max_retries(5); - - let reqwest_client = reqwest::Client::builder() - .timeout(req_timeout) - .user_agent("orb-backend-status") - .build() - .wrap_err("failed to build reqwest client")?; - - let name = orb_id.as_str().to_string().into(); - - let client = ClientBuilder::new(reqwest_client) - .with_init(Extension(OtelName(name))) - .with(TracingMiddleware::default()) - .with(RetryTransientMiddleware::new_with_policy(retry_policy)) - .build(); - - Ok(Self { - client, - orb_id: orb_id.clone(), - orb_name, - jabil_id, - orb_os_version, - endpoint, - }) - } -} - -impl StatusClient { - #[instrument(skip(self, current_status))] - pub async fn send_status( - &self, - current_status: &CurrentStatus, - auth_token: &str, - ) -> Result<()> { - let request = build_status_request_v2( - &self.orb_id, - &self.orb_name, - &self.jabil_id, - &self.orb_os_version, - current_status, - ) - .await?; - - // Build request with optional authentication - let request_builder = self - .client - .post(self.endpoint.clone()) - .json(&request) - .basic_auth(self.orb_id.to_string(), Some(auth_token.to_string())); - - let response = request_builder.send().await?; - - let status = response.status(); - if !status.is_success() { - let response_body = response.text().await.unwrap_or_default(); - return Err(eyre::eyre!( - "Backend status error: {} - {}", - status, - response_body - )); - } - - Ok(()) - } -} - -async fn build_status_request_v2( - orb_id: &OrbId, - orb_name: &OrbName, - jabil_id: &OrbJabilId, - orb_os_version: &str, - current_status: &CurrentStatus, -) -> Result { - let uptime_sec = orb_uptime().await; - Ok(OrbStatusApiV2 { - orb_id: Some(orb_id.to_string()), - orb_name: Some(orb_name.to_string()), - jabil_id: Some(jabil_id.to_string()), - uptime_sec, - version: Some(VersionApiV2 { - current_release: Some(orb_os_version.to_string()), - }), - location_data: current_status.wifi_networks.as_ref().map(|wifi_networks| { - LocationDataApiV2 { - wifi: Some( - wifi_networks - .iter() - .map(|w| WifiDataApiV2 { - ssid: Some(w.ssid.clone()), - bssid: Some(w.bssid.clone()), - signal_strength: Some(w.signal_level), - frequency: Some(w.frequency), - channel: freq_to_channel(w.frequency), - signal_to_noise_ratio: None, - }) - .collect(), - ), - gps: None, - cell: None, - } - }), - update_progress: current_status.update_progress.as_ref().map( - |update_progress| UpdateProgressApiV2 { - download_progress: update_progress.download_progress, - processed_progress: update_progress.processed_progress, - install_progress: update_progress.install_progress, - total_progress: update_progress.total_progress, - error: update_progress.error.clone(), - state: update_progress.state, - }, - ), - net_stats: current_status - .net_stats - .as_ref() - .map(|net_stats| NetStatsApiV2 { - interfaces: net_stats - .interfaces - .iter() - .map(|i| NetIntfApiV2 { - name: i.name.clone(), - tx_bytes: i.tx_bytes, - rx_bytes: i.rx_bytes, - tx_packets: i.tx_packets, - rx_packets: i.rx_packets, - tx_errors: i.tx_errors, - rx_errors: i.rx_errors, - }) - .collect(), - }), - battery: current_status - .core_stats - .as_ref() - .map(|core_stats| BatteryApiV2 { - level: Some(core_stats.battery.level), - is_charging: Some(core_stats.battery.is_charging), - }), - mac_address: current_status - .core_stats - .as_ref() - .map(|core_stats| core_stats.mac_address.clone()), - ssd: current_status - .core_stats - .as_ref() - .map(|core_stats| SsdStatusApiV2 { - file_left: Some(core_stats.ssd.file_left), - space_left: Some(core_stats.ssd.space_left), - signup_left_to_upload: Some(core_stats.ssd.signup_left_to_upload), - }), - temperature: current_status.core_stats.as_ref().map(|core_stats| { - TemperatureApiV2 { - cpu: Some(core_stats.temperature.cpu), - gpu: Some(core_stats.temperature.gpu), - front_unit: Some(core_stats.temperature.front_unit), - front_pcb: Some(core_stats.temperature.front_pcb), - battery_pcb: Some(core_stats.temperature.battery_pcb), - battery_cell: Some(core_stats.temperature.battery_cell), - backup_battery: Some(core_stats.temperature.backup_battery), - liquid_lens: Some(core_stats.temperature.liquid_lens), - main_accelerometer: Some(core_stats.temperature.main_accelerometer), - main_mcu: Some(core_stats.temperature.main_mcu), - mainboard: Some(core_stats.temperature.mainboard), - security_accelerometer: Some( - core_stats.temperature.security_accelerometer, - ), - security_mcu: Some(core_stats.temperature.security_mcu), - battery_pack: Some(core_stats.temperature.battery_pack), - ssd: Some(core_stats.temperature.ssd), - } - }), - wifi: current_status - .connd_report - .as_ref() - .and_then(|connd_report| { - connd_report.scanned_networks.iter().find(|n| { - connd_report - .active_wifi_profile - .as_ref() - .is_some_and(|p| p == &n.ssid) - }) - }) - .map(|wifi| WifiApiV2 { - ssid: Some(wifi.ssid.clone()), - bssid: Some(wifi.bssid.clone()), - frequency: Some(wifi.frequency), - quality: Some(WifiQualityApiV2 { - signal_level: Some(wifi.signal_level), - bit_rate: None, - link_quality: None, - noise_level: None, - }), - }), - signup_state: current_status - .signup_state - .as_ref() - .map(|state| state.to_string()), - cellular_status: current_status - .cellular_status - .as_ref() - // backend requires ICCID to be Some otherwise it will fail deserialization - // of CellularStatusApiV2. So if ICCID is None, the struct itself should be None. - .and_then(|cs| cs.iccid.as_ref().map(|iccid| (cs, iccid))) - .map(|(cs, iccid)| CellularStatusApiV2 { - imei: cs.imei.clone(), - fw_revision: cs.fw_revision.clone(), - iccid: iccid.to_owned(), - rat: cs.rat.clone(), - operator: cs.operator.clone(), - rsrp: cs.rsrp, - rsrq: cs.rsrq, - rssi: cs.rssi, - snr: cs.snr, - }), - connd_report: current_status - .connd_report - .as_ref() - .map(|r| ConndReportApiV2 { - egress_iface: r.egress_iface.clone(), - wifi_enabled: r.wifi_enabled, - smart_switching: r.smart_switching, - airplane_mode: r.airplane_mode, - active_wifi_profile: r.active_wifi_profile.clone(), - saved_wifi_profiles: r - .saved_wifi_profiles - .iter() - .map(|p| WifiProfileApiV2 { - ssid: p.ssid.clone(), - sec: p.sec.clone(), - }) - .collect(), - }), - hardware_states: current_status.hardware_states.as_ref().map(|states| { - states - .iter() - .map(|(k, v)| { - ( - k.clone(), - HardwareStateApiV2 { - status: v.status.clone(), - message: v.message.clone(), - }, - ) - }) - .collect() - }), - main_mcu: build_main_mcu_api(current_status), - oes: None, - orb_stand_qr_id: current_status - .core_stats - .as_ref() - .and_then(|core_stats| core_stats.orb_stand_qr_id.clone()), - timestamp: Utc::now(), - }) -} - -fn build_main_mcu_api(current_status: &CurrentStatus) -> Option { - let front_als = current_status - .front_als - .as_ref() - .map(|als| AmbientLightApiV2 { - ambient_light_lux: als.ambient_light_lux, - flag: flag_to_api_str(als.flag).to_string(), - }); - - // Only return Some if there's at least one field populated - if front_als.is_some() { - Some(MainMcuApiV2 { front_als }) - } else { - None - } -} - -// Helper function to convert frequency to channel number -fn freq_to_channel(freq: u32) -> Option { - // For 2.4 GHz: channel = (freq - 2412) / 5 + 1 - if (2412..=2484).contains(&freq) { - if freq == 2484 { - // Special case for channel 14 - return Some(14); - } - return Some((freq - 2412) / 5 + 1); - } - - // For 5 GHz: varies by region, but generally channel = (freq - 5000) / 5 - if (5170..=5825).contains(&freq) { - return Some((freq - 5000) / 5); - } - - // For 6 GHz (Wi-Fi 6E): channel = (freq - 5950) / 5 + 1 - if (5955..=7115).contains(&freq) { - return Some((freq - 5950) / 5 + 1); - } - - None -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use super::*; - use orb_backend_status_dbus::types::{SignupState, WifiNetwork}; - use orb_info::OrbId; - - #[tokio::test] - async fn test_build_status_request_v2() { - let orb_id = OrbId::from_str("abcdef12").unwrap(); - let orb_name = OrbName::from_str("TestOrb").unwrap(); - let jabil_id = OrbJabilId::from_str("1234567890").unwrap(); - let orb_os_version = "1.0.0"; - - let wifi_networks = vec![WifiNetwork { - bssid: "00:11:22:33:44:55".into(), - frequency: 2412, - signal_level: -45, - ssid: "TestAP".into(), - }]; - - let request = build_status_request_v2( - &orb_id, - &orb_name, - &jabil_id, - orb_os_version, - &CurrentStatus { - wifi_networks: Some(wifi_networks), - signup_state: Some(SignupState::Ready), - ..Default::default() - }, - ) - .await - .unwrap(); - - assert_eq!(request.orb_id, Some("abcdef12".to_string())); - assert!(request.timestamp <= Utc::now()); - - let location_data = request - .location_data - .expect("Location data should be present"); - - assert!( - location_data.cell.is_none(), - "Cell data should not be present" - ); - - let wifi_data = location_data.wifi.expect("WiFi data should be present"); - assert_eq!(wifi_data.len(), 1); - let wifi = &wifi_data[0]; - - assert_eq!(wifi.bssid, Some("00:11:22:33:44:55".to_string())); - assert_eq!(wifi.ssid, Some("TestAP".to_string())); - assert_eq!(wifi.signal_strength, Some(-45)); - assert_eq!(wifi.channel, Some(1)); // 2412 MHz = channel 1 - assert_eq!(wifi.signal_to_noise_ratio, None); - - let signup_state = request - .signup_state - .expect("Signup state should be present"); - assert_eq!(signup_state, "Ready"); - } - - #[tokio::test] - async fn test_freq_to_channel_conversion() { - // 2.4 GHz band - assert_eq!(freq_to_channel(2412), Some(1)); - assert_eq!(freq_to_channel(2437), Some(6)); - assert_eq!(freq_to_channel(2472), Some(13)); - assert_eq!(freq_to_channel(2484), Some(14)); - - // 5 GHz band - assert_eq!(freq_to_channel(5180), Some(36)); - assert_eq!(freq_to_channel(5500), Some(100)); - - // 6 GHz band - assert_eq!(freq_to_channel(5955), Some(2)); - assert_eq!(freq_to_channel(6175), Some(46)); - - // Invalid frequencies - assert_eq!(freq_to_channel(1000), None); - assert_eq!(freq_to_channel(9000), None); - } -} diff --git a/orb-backend-status/src/backend/status_req_builder.rs b/orb-backend-status/src/backend/status_req_builder.rs new file mode 100644 index 000000000..899f2ef73 --- /dev/null +++ b/orb-backend-status/src/backend/status_req_builder.rs @@ -0,0 +1,300 @@ +use super::types::{ + ConndReportApiV2, LocationDataApiV2, NetIntfApiV2, NetStatsApiV2, OrbStatusApiV2, + UpdateProgressApiV2, WifiProfileApiV2, +}; +use crate::{ + backend::{ + types::{ + AmbientLightApiV2, BatteryApiV2, CellularStatusApiV2, HardwareStateApiV2, + MainMcuApiV2, SsdStatusApiV2, TemperatureApiV2, WifiApiV2, WifiDataApiV2, + WifiQualityApiV2, + }, + uptime::orb_uptime, + }, + collectors::front_als::flag_to_api_str, + dbus::intf_impl::CurrentStatus, +}; +use chrono::Utc; +use tracing::warn; + +impl CurrentStatus { + pub async fn to_orb_status_api_v2_req(&self) -> OrbStatusApiV2 { + let uptime_sec = orb_uptime() + .await + .inspect_err(|e| warn!("failed to read orb uptime: {e:?}")) + .ok(); + + OrbStatusApiV2 { + orb_id: None, + orb_name: None, + jabil_id: None, + version: None, + uptime_sec, + location_data: self.wifi_networks.as_ref().map(|wifi_networks| { + LocationDataApiV2 { + wifi: Some( + wifi_networks + .iter() + .map(|w| WifiDataApiV2 { + ssid: Some(w.ssid.clone()), + bssid: Some(w.bssid.clone()), + signal_strength: Some(w.signal_level), + frequency: Some(w.frequency), + channel: freq_to_channel(w.frequency), + signal_to_noise_ratio: None, + }) + .collect(), + ), + gps: None, + cell: None, + } + }), + update_progress: self.update_progress.as_ref().map(|update_progress| { + UpdateProgressApiV2 { + download_progress: update_progress.download_progress, + processed_progress: update_progress.processed_progress, + install_progress: update_progress.install_progress, + total_progress: update_progress.total_progress, + error: update_progress.error.clone(), + state: update_progress.state, + } + }), + net_stats: self.net_stats.as_ref().map(|net_stats| NetStatsApiV2 { + interfaces: net_stats + .interfaces + .iter() + .map(|i| NetIntfApiV2 { + name: i.name.clone(), + tx_bytes: i.tx_bytes, + rx_bytes: i.rx_bytes, + tx_packets: i.tx_packets, + rx_packets: i.rx_packets, + tx_errors: i.tx_errors, + rx_errors: i.rx_errors, + }) + .collect(), + }), + battery: self.core_stats.as_ref().map(|core_stats| BatteryApiV2 { + level: Some(core_stats.battery.level), + is_charging: Some(core_stats.battery.is_charging), + }), + mac_address: self + .core_stats + .as_ref() + .map(|core_stats| core_stats.mac_address.clone()), + ssd: self.core_stats.as_ref().map(|core_stats| SsdStatusApiV2 { + file_left: Some(core_stats.ssd.file_left), + space_left: Some(core_stats.ssd.space_left), + signup_left_to_upload: Some(core_stats.ssd.signup_left_to_upload), + }), + temperature: self.core_stats.as_ref().map(|core_stats| TemperatureApiV2 { + cpu: Some(core_stats.temperature.cpu), + gpu: Some(core_stats.temperature.gpu), + front_unit: Some(core_stats.temperature.front_unit), + front_pcb: Some(core_stats.temperature.front_pcb), + battery_pcb: Some(core_stats.temperature.battery_pcb), + battery_cell: Some(core_stats.temperature.battery_cell), + backup_battery: Some(core_stats.temperature.backup_battery), + liquid_lens: Some(core_stats.temperature.liquid_lens), + main_accelerometer: Some(core_stats.temperature.main_accelerometer), + main_mcu: Some(core_stats.temperature.main_mcu), + mainboard: Some(core_stats.temperature.mainboard), + security_accelerometer: Some( + core_stats.temperature.security_accelerometer, + ), + security_mcu: Some(core_stats.temperature.security_mcu), + battery_pack: Some(core_stats.temperature.battery_pack), + ssd: Some(core_stats.temperature.ssd), + }), + wifi: self + .connd_report + .as_ref() + .and_then(|connd_report| { + connd_report.scanned_networks.iter().find(|n| { + connd_report + .active_wifi_profile + .as_ref() + .is_some_and(|p| p == &n.ssid) + }) + }) + .map(|wifi| WifiApiV2 { + ssid: Some(wifi.ssid.clone()), + bssid: Some(wifi.bssid.clone()), + frequency: Some(wifi.frequency), + quality: Some(WifiQualityApiV2 { + signal_level: Some(wifi.signal_level), + bit_rate: None, + link_quality: None, + noise_level: None, + }), + }), + signup_state: self.signup_state.as_ref().map(|state| state.to_string()), + cellular_status: self + .cellular_status + .as_ref() + // backend requires ICCID to be Some otherwise it will fail deserialization + // of CellularStatusApiV2. So if ICCID is None, the struct itself should be None. + .and_then(|cs| cs.iccid.as_ref().map(|iccid| (cs, iccid))) + .map(|(cs, iccid)| CellularStatusApiV2 { + imei: cs.imei.clone(), + fw_revision: cs.fw_revision.clone(), + iccid: iccid.to_owned(), + rat: cs.rat.clone(), + operator: cs.operator.clone(), + rsrp: cs.rsrp, + rsrq: cs.rsrq, + rssi: cs.rssi, + snr: cs.snr, + }), + connd_report: self.connd_report.as_ref().map(|r| ConndReportApiV2 { + egress_iface: r.egress_iface.clone(), + wifi_enabled: r.wifi_enabled, + smart_switching: r.smart_switching, + airplane_mode: r.airplane_mode, + active_wifi_profile: r.active_wifi_profile.clone(), + saved_wifi_profiles: r + .saved_wifi_profiles + .iter() + .map(|p| WifiProfileApiV2 { + ssid: p.ssid.clone(), + sec: p.sec.clone(), + }) + .collect(), + }), + hardware_states: self.hardware_states.as_ref().map(|states| { + states + .iter() + .map(|(k, v)| { + ( + k.clone(), + HardwareStateApiV2 { + status: v.status.clone(), + message: v.message.clone(), + }, + ) + }) + .collect() + }), + main_mcu: build_main_mcu_api(self), + oes: None, + oes_cached: false, + orb_stand_qr_id: self + .core_stats + .as_ref() + .and_then(|core_stats| core_stats.orb_stand_qr_id.clone()), + timestamp: Utc::now(), + } + } +} + +fn build_main_mcu_api(current_status: &CurrentStatus) -> Option { + let front_als = current_status + .front_als + .as_ref() + .map(|als| AmbientLightApiV2 { + ambient_light_lux: als.ambient_light_lux, + flag: flag_to_api_str(als.flag).to_string(), + }); + + // Only return Some if there's at least one field populated + if front_als.is_some() { + Some(MainMcuApiV2 { front_als }) + } else { + None + } +} + +// Helper function to convert frequency to channel number +fn freq_to_channel(freq: u32) -> Option { + // For 2.4 GHz: channel = (freq - 2412) / 5 + 1 + if (2412..=2484).contains(&freq) { + if freq == 2484 { + // Special case for channel 14 + return Some(14); + } + return Some((freq - 2412) / 5 + 1); + } + + // For 5 GHz: varies by region, but generally channel = (freq - 5000) / 5 + if (5170..=5825).contains(&freq) { + return Some((freq - 5000) / 5); + } + + // For 6 GHz (Wi-Fi 6E): channel = (freq - 5950) / 5 + 1 + if (5955..=7115).contains(&freq) { + return Some((freq - 5950) / 5 + 1); + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + use orb_backend_status_dbus::types::{SignupState, WifiNetwork}; + + #[tokio::test] + async fn test_build_status_request_v2() { + let wifi_networks = vec![WifiNetwork { + bssid: "00:11:22:33:44:55".into(), + frequency: 2412, + signal_level: -45, + ssid: "TestAP".into(), + }]; + + let request = CurrentStatus { + wifi_networks: Some(wifi_networks), + signup_state: Some(SignupState::Ready), + ..Default::default() + } + .to_orb_status_api_v2_req() + .await; + + assert!(request.timestamp <= Utc::now()); + + let location_data = request + .location_data + .expect("Location data should be present"); + + assert!( + location_data.cell.is_none(), + "Cell data should not be present" + ); + + let wifi_data = location_data.wifi.expect("WiFi data should be present"); + assert_eq!(wifi_data.len(), 1); + let wifi = &wifi_data[0]; + + assert_eq!(wifi.bssid, Some("00:11:22:33:44:55".to_string())); + assert_eq!(wifi.ssid, Some("TestAP".to_string())); + assert_eq!(wifi.signal_strength, Some(-45)); + assert_eq!(wifi.channel, Some(1)); // 2412 MHz = channel 1 + assert_eq!(wifi.signal_to_noise_ratio, None); + + let signup_state = request + .signup_state + .expect("Signup state should be present"); + assert_eq!(signup_state, "Ready"); + } + + #[tokio::test] + async fn test_freq_to_channel_conversion() { + // 2.4 GHz band + assert_eq!(freq_to_channel(2412), Some(1)); + assert_eq!(freq_to_channel(2437), Some(6)); + assert_eq!(freq_to_channel(2472), Some(13)); + assert_eq!(freq_to_channel(2484), Some(14)); + + // 5 GHz band + assert_eq!(freq_to_channel(5180), Some(36)); + assert_eq!(freq_to_channel(5500), Some(100)); + + // 6 GHz band + assert_eq!(freq_to_channel(5955), Some(2)); + assert_eq!(freq_to_channel(6175), Some(46)); + + // Invalid frequencies + assert_eq!(freq_to_channel(1000), None); + assert_eq!(freq_to_channel(9000), None); + } +} diff --git a/orb-backend-status/src/backend/types.rs b/orb-backend-status/src/backend/types.rs index b4268633e..c8f99fb0a 100644 --- a/orb-backend-status/src/backend/types.rs +++ b/orb-backend-status/src/backend/types.rs @@ -36,6 +36,7 @@ pub struct OrbStatusApiV2 { // orb event stream #[serde(skip_serializing_if = "Option::is_none")] pub oes: Option>, + pub oes_cached: bool, pub orb_stand_qr_id: Option, } diff --git a/orb-backend-status/src/backend/uptime.rs b/orb-backend-status/src/backend/uptime.rs index 42b30652c..096ff2378 100644 --- a/orb-backend-status/src/backend/uptime.rs +++ b/orb-backend-status/src/backend/uptime.rs @@ -1,11 +1,23 @@ -pub async fn orb_uptime() -> Option { +use color_eyre::Result; +use eyre::{Context, ContextCompat}; + +pub async fn orb_uptime() -> Result { orb_uptime_from_path("/proc/uptime").await } -async fn orb_uptime_from_path(path: &str) -> Option { - let uptime = tokio::fs::read_to_string(path).await.ok()?; - let uptime = uptime.split_whitespace().next()?; - uptime.parse::().ok() +async fn orb_uptime_from_path(path: &str) -> Result { + let uptime = tokio::fs::read_to_string(path) + .await + .wrap_err("failed to read uptime from procs")?; + + let uptime = uptime + .split_whitespace() + .next() + .wrap_err_with(|| format!("failed to split whitespace in uptime: {uptime}"))?; + + uptime + .parse::() + .wrap_err_with(|| format!("failed to parse uptime: {uptime}")) } #[cfg(test)] @@ -18,8 +30,10 @@ mod tests { let file_path = dir.path().join("uptime"); tokio::fs::write(&file_path, "123.45 678.90").await.unwrap(); assert_eq!( - orb_uptime_from_path(file_path.to_str().unwrap()).await, - Some(123.45) + orb_uptime_from_path(file_path.to_str().unwrap()) + .await + .unwrap(), + 123.45 ); } @@ -28,10 +42,9 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let file_path = dir.path().join("uptime"); tokio::fs::write(&file_path, "invalid").await.unwrap(); - assert_eq!( - orb_uptime_from_path(file_path.to_str().unwrap()).await, - None - ); + assert!(orb_uptime_from_path(file_path.to_str().unwrap()) + .await + .is_err()); } #[tokio::test] @@ -39,14 +52,13 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let file_path = dir.path().join("uptime"); tokio::fs::write(&file_path, "").await.unwrap(); - assert_eq!( - orb_uptime_from_path(file_path.to_str().unwrap()).await, - None - ); + assert!(orb_uptime_from_path(file_path.to_str().unwrap()) + .await + .is_err()); } #[tokio::test] async fn test_orb_uptime_from_path_file_not_found() { - assert_eq!(orb_uptime_from_path("nonexistent").await, None); + assert!(orb_uptime_from_path("nonexistent").await.is_err()); } } diff --git a/orb-backend-status/src/collectors/connectivity.rs b/orb-backend-status/src/collectors/connectivity.rs index a9f80fbee..d4f0b33e3 100644 --- a/orb-backend-status/src/collectors/connectivity.rs +++ b/orb-backend-status/src/collectors/connectivity.rs @@ -1,6 +1,6 @@ use super::ZenorbCtx; use color_eyre::Result; -use tracing::debug; +use tracing::info; use zenorb::zenoh::sample::Sample; #[derive(Debug, Clone, PartialEq, Eq)] @@ -28,6 +28,7 @@ pub(crate) async fn handle_connection_event( ) -> Result<()> { let payload = sample.payload().to_bytes(); let active_conns: oes::ActiveConnections = serde_json::from_slice(&payload)?; + let connected = active_conns.connections.iter().any(|c| c.has_internet); let ssid = active_conns .connections @@ -43,7 +44,7 @@ pub(crate) async fn handle_connection_event( let prev = ctx.connectivity_tx.borrow().clone(); if prev != connectivity { - debug!("global connectivity changed: {connectivity:?}"); + info!("global connectivity changed: {connectivity:?}"); let prev_ssid = prev.ssid(); let new_ssid = connectivity.ssid(); diff --git a/orb-backend-status/src/collectors/mod.rs b/orb-backend-status/src/collectors/mod.rs index 5316a4303..7688d9d9d 100644 --- a/orb-backend-status/src/collectors/mod.rs +++ b/orb-backend-status/src/collectors/mod.rs @@ -7,7 +7,7 @@ pub mod oes; pub mod token; pub mod update_progress; -use crate::dbus::intf_impl::BackendStatusImpl; +use crate::{dbus::intf_impl::BackendStatusImpl, oes_cache::OesEventCache}; use connectivity::GlobalConnectivity; use hardware_states::HardwareState; use orb_messages::main::AmbientLight; @@ -26,4 +26,5 @@ pub(crate) struct ZenorbCtx { pub front_als: Arc>>, pub oes_tx: flume::Sender, pub oes_throttle: Arc>>, + pub oes_cache: OesEventCache, } diff --git a/orb-backend-status/src/collectors/oes.rs b/orb-backend-status/src/collectors/oes.rs index eba7b0bee..cf8dc45c4 100644 --- a/orb-backend-status/src/collectors/oes.rs +++ b/orb-backend-status/src/collectors/oes.rs @@ -68,6 +68,12 @@ pub(crate) async fn handle_oes_event( payload, }; + // HACKY and TECH DEBT i will remove in next pr + if event.name == "connd/active_connections" { + ctx.oes_cache.insert(event.clone())?; + } + // end of hacky + if let Err(e) = ctx.oes_tx.send(event) { warn!("Failed to send OES event over channel: {e}"); } diff --git a/orb-backend-status/src/dbus/intf_impl.rs b/orb-backend-status/src/dbus/intf_impl.rs index 3919223b3..2e8813849 100644 --- a/orb-backend-status/src/dbus/intf_impl.rs +++ b/orb-backend-status/src/dbus/intf_impl.rs @@ -27,6 +27,8 @@ pub struct BackendStatusImpl { send_immediately: Arc>, } +/// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE +/// If you need to send new data types to the backend, use the OES #[derive(Debug, Default, Clone)] pub struct CurrentStatus { pub wifi_networks: Option>, @@ -40,6 +42,8 @@ pub struct CurrentStatus { pub front_als: Option, } +/// THIS IS DEPRECATED, PLEASE DO NOT ADD ANY NEW METHODS OR USE THIS ANYMORE +/// If you need to send new data types to the backend, use the OES impl BackendStatusT for BackendStatusImpl { fn provide_update_progress( &self, @@ -204,10 +208,6 @@ impl BackendStatusImpl { .unwrap_or_default() } - pub fn should_send_immediately(&self) -> bool { - self.send_immediately.lock().map(|v| *v).unwrap_or(false) - } - pub fn clear_send_immediately(&self) { if let Ok(mut send_immediately) = self.send_immediately.lock() { *send_immediately = false; diff --git a/orb-backend-status/src/lib.rs b/orb-backend-status/src/lib.rs index c992c610c..eefce4628 100644 --- a/orb-backend-status/src/lib.rs +++ b/orb-backend-status/src/lib.rs @@ -1,13 +1,14 @@ pub mod backend; pub mod collectors; pub mod dbus; +pub mod oes_cache; pub mod oes_flusher; #[allow(dead_code)] pub(crate) mod oes_reroute; pub mod sender; -use crate::{oes_reroute::OesReroute, sender::BackendSender}; -use backend::status::StatusClient; +use crate::{oes_cache::OesEventCache, oes_reroute::OesReroute, sender::BackendSender}; +use backend::client::StatusClient; use collectors::{ connectivity::{self, GlobalConnectivity}, core_signups, front_als, hardware_states, net_stats, oes, @@ -38,8 +39,6 @@ pub async fn program( orb_jabil_id: OrbJabilId, net_stats_poll_interval: Duration, sender_interval: Duration, - sender_min_backoff: Duration, - sender_max_backoff: Duration, req_timeout: Duration, req_min_retry_interval: Duration, req_max_retry_interval: Duration, @@ -49,33 +48,29 @@ pub async fn program( info!("Starting backend-status, endpoint: {endpoint}, orb_id: {orb_id}, orb_name: {orb_name}, orb_jabil_id: {orb_jabil_id}"); let backend_status_impl = BackendStatusImpl::new(); + let oes_cache = OesEventCache::default(); setup_dbus(&dbus, backend_status_impl.clone()).await?; let token_receiver = TokenWatcher::spawn(dbus.clone(), shutdown_token.clone()).await; - let oes_endpoint = endpoint.clone(); - let oes_orb_id = orb_id.clone(); - - let status_client = StatusClient::new( - endpoint, - orb_os_version, - orb_id, - orb_name, - orb_jabil_id, - req_timeout, - req_min_retry_interval, - req_max_retry_interval, - ) - .await?; - - let sender = BackendSender::new( - status_client, - sender_interval, - sender_min_backoff, - sender_max_backoff, - ); + // Build unified zenorb context and single receiver + let (connectivity_tx, connectivity_receiver) = + watch::channel(GlobalConnectivity::NotConnected); + + let status_client = StatusClient::builder() + .orb_id(orb_id) + .orb_name(orb_name) + .jabil_id(orb_jabil_id) + .orb_os_version(orb_os_version) + .endpoint(endpoint) + .req_timeout(req_timeout) + .min_req_retry_interval(req_min_retry_interval) + .max_req_retry_interval(req_max_retry_interval) + .attest_token_rx(token_receiver) + .connectivity_rx(connectivity_receiver.clone()) + .build(); // Spawn non-zenorb collectors let mut tasks: Vec> = vec![]; @@ -99,10 +94,6 @@ pub async fn program( shutdown_token.clone(), )); - // Build unified zenorb context and single receiver - let (connectivity_tx, connectivity_receiver) = - watch::channel(GlobalConnectivity::NotConnected); - let (oes_tx, oes_rx) = flume::unbounded(); let zenorb_ctx = ZenorbCtx { @@ -112,6 +103,7 @@ pub async fn program( front_als: Arc::new(tokio::sync::Mutex::new(None)), oes_tx, oes_throttle: Arc::new(std::sync::Mutex::new(HashMap::new())), + oes_cache: oes_cache.clone(), }; let zenorb_tasks = zsession @@ -140,6 +132,17 @@ pub async fn program( .run() .await?; + tasks.push(tokio::spawn(oes_flusher::run_oes_flush_loop( + oes_rx, + status_client.clone(), + shutdown_token.clone(), + ))); + + let sender = BackendSender::new(status_client.clone(), oes_cache, sender_interval); + sender + .run_loop(backend_status_impl, shutdown_token.clone()) + .await; + // Spawn a single shutdown task for all zenorb subscribers let shutdown = shutdown_token.clone(); tasks.push(tokio::spawn(async move { @@ -149,25 +152,6 @@ pub async fn program( } })); - // Spawn OES flush loop - tasks.push(tokio::spawn(oes_flusher::run_oes_flush_loop( - oes_rx, - oes_endpoint, - oes_orb_id, - token_receiver.clone(), - connectivity_receiver.clone(), - shutdown_token.clone(), - ))); - - sender - .run_loop( - backend_status_impl, - token_receiver, - connectivity_receiver, - shutdown_token.clone(), - ) - .await; - for task in tasks { task.abort(); } diff --git a/orb-backend-status/src/main.rs b/orb-backend-status/src/main.rs index 354a94a83..e2b8a768f 100644 --- a/orb-backend-status/src/main.rs +++ b/orb-backend-status/src/main.rs @@ -64,8 +64,6 @@ async fn main() -> Result<()> { .procfs("/proc") .net_stats_poll_interval(Duration::from_secs(30)) .sender_interval(Duration::from_secs(30)) - .sender_min_backoff(Duration::from_secs(1)) - .sender_max_backoff(Duration::from_secs(30)) .req_timeout(Duration::from_secs(2)) .req_min_retry_interval(Duration::from_millis(100)) .req_max_retry_interval(Duration::from_secs(500)) diff --git a/orb-backend-status/src/oes_cache.rs b/orb-backend-status/src/oes_cache.rs new file mode 100644 index 000000000..9bdc1b073 --- /dev/null +++ b/orb-backend-status/src/oes_cache.rs @@ -0,0 +1,31 @@ +use crate::collectors::oes::Event; +use color_eyre::{eyre::eyre, Result}; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; + +// hacky, tech debt, will be removed soon +#[derive(Clone, Default)] +pub struct OesEventCache(Arc>>); + +impl OesEventCache { + pub fn insert(&self, evt: Event) -> Result<()> { + let mut cache = self.0.lock().map_err(|_| eyre!("cache lock poison"))?; + cache.insert(evt.name.clone(), evt); + + Ok(()) + } + + pub fn values(&self) -> Result> { + let values = self + .0 + .lock() + .map_err(|_| eyre!("cache lock poison"))? + .values() + .cloned() + .collect(); + + Ok(values) + } +} diff --git a/orb-backend-status/src/oes_flusher.rs b/orb-backend-status/src/oes_flusher.rs index 86af83bd3..8670d4c96 100644 --- a/orb-backend-status/src/oes_flusher.rs +++ b/orb-backend-status/src/oes_flusher.rs @@ -1,48 +1,21 @@ +use crate::backend::client::{self, StatusClient}; use crate::backend::types::OrbStatusApiV2; -use crate::collectors::connectivity::GlobalConnectivity; use crate::collectors::oes::Event; -use chrono::Utc; -use orb_info::OrbId; -use reqwest::Url; use std::time::Duration; -use tokio::sync::watch; -use tokio::time::{self, Instant}; +use tokio::time::{self}; use tokio_util::sync::CancellationToken; use tracing::{debug, error, warn}; pub async fn run_oes_flush_loop( oes_rx: flume::Receiver, - endpoint: Url, - orb_id: OrbId, - token_receiver: watch::Receiver, - connectivity_receiver: watch::Receiver, + client: StatusClient, shutdown_token: CancellationToken, ) { - let client = reqwest::Client::builder() - .timeout(Duration::from_secs(5)) - .build() - .expect("failed to build OES reqwest client"); - let mut buffer: Vec = Vec::new(); - let mut last_flush = Instant::now() - Duration::from_secs(1); - let mut backoff = Duration::from_secs(1); let mut interval = time::interval(Duration::from_secs(1)); - let token_backoff = Duration::from_secs(5); interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); loop { - let token = token_receiver.borrow().clone(); - if token.is_empty() { - warn!( - "oes_flusher could not get auth token. waiting {}s and trying again", - token_backoff.as_secs() - ); - - time::sleep(token_backoff).await; - - continue; - } - tokio::select! { _ = shutdown_token.cancelled() => { if !buffer.is_empty() { @@ -50,11 +23,9 @@ pub async fn run_oes_flush_loop( count = buffer.len(), "Shutdown: attempting final OES flush", ); + if let Err(e) = flush_events( &client, - &endpoint, - &orb_id, - &token, &buffer, ).await { warn!("Final OES flush failed: {e}"); @@ -70,6 +41,7 @@ pub async fn run_oes_flush_loop( buffer.push(event); drain_available(&oes_rx, &mut buffer); } + Err(_) => { debug!("OES channel closed, exiting flush loop"); @@ -81,17 +53,7 @@ pub async fn run_oes_flush_loop( _ = interval.tick() => {} } - maybe_flush( - &client, - &endpoint, - &orb_id, - &token, - &mut buffer, - &mut last_flush, - &mut backoff, - &connectivity_receiver, - ) - .await; + maybe_flush(&client, &mut buffer).await; } } @@ -101,82 +63,55 @@ fn drain_available(rx: &flume::Receiver, buffer: &mut Vec) { } } -const MAX_BACKOFF: Duration = Duration::from_secs(60); const MAX_BATCH_EVENTS: usize = 100; #[allow(clippy::too_many_arguments)] -async fn maybe_flush( - client: &reqwest::Client, - endpoint: &Url, - orb_id: &OrbId, - token: &str, - buffer: &mut Vec, - last_flush: &mut Instant, - backoff: &mut Duration, - connectivity_receiver: &watch::Receiver, -) { +async fn maybe_flush(client: &StatusClient, buffer: &mut Vec) { if buffer.is_empty() { return; } - if last_flush.elapsed() < *backoff { - return; - } - - if !connectivity_receiver.borrow().is_connected() { - debug!(count = buffer.len(), "Orb offline, skipping OES flush"); - - return; - } - let batch_size = buffer.len().min(MAX_BATCH_EVENTS); let batch = &buffer[..batch_size]; - match flush_events(client, endpoint, orb_id, token, batch).await { - Ok(()) => { - debug!(count = batch_size, "OES flush successful"); - buffer.drain(..batch_size); - *last_flush = Instant::now(); - *backoff = Duration::from_secs(1); + match flush_events(client, batch).await { + Ok(sent) => { + if sent { + debug!(count = batch_size, "OES flush successful"); + buffer.drain(..batch_size); + } } + Err(e) => { error!( count = buffer.len(), "OES flush failed, events remain buffered: {e}", ); - *last_flush = Instant::now(); - *backoff = (*backoff * 2).min(MAX_BACKOFF); } } } -async fn flush_events( - client: &reqwest::Client, - endpoint: &Url, - orb_id: &OrbId, - token: &str, - events: &[Event], -) -> eyre::Result<()> { - let request = OrbStatusApiV2 { - orb_id: Some(orb_id.to_string()), +async fn flush_events(client: &StatusClient, events: &[Event]) -> eyre::Result { + let req = OrbStatusApiV2 { oes: Some(events.to_vec()), - timestamp: Utc::now(), ..Default::default() }; - let response = client - .post(endpoint.clone()) - .json(&request) - .basic_auth(orb_id.to_string(), Some(token)) - .send() - .await?; + let res = match client.req(req).await { + Err(client::Err::MissingAttestToken | client::Err::NoConnectivity) => { + return Ok(false); + } - let status = response.status(); - if !status.is_success() { - let body = response.text().await.unwrap_or_default(); + Err(client::Err::Other(e)) => return Err(e), + Ok(res) => res, + }; + + let status = res.status(); + if !status.is_success() { + let body = res.text().await.unwrap_or_default(); return Err(eyre::eyre!("OES flush error: {status} - {body}")); } - Ok(()) + Ok(true) } diff --git a/orb-backend-status/src/oes_reroute.rs b/orb-backend-status/src/oes_reroute.rs index 7320b9167..e4602e68f 100644 --- a/orb-backend-status/src/oes_reroute.rs +++ b/orb-backend-status/src/oes_reroute.rs @@ -106,7 +106,7 @@ mod tests { use super::*; use crate::{ collectors::connectivity::GlobalConnectivity, - dbus::intf_impl::BackendStatusImpl, + dbus::intf_impl::BackendStatusImpl, oes_cache::OesEventCache, }; use proptest::prelude::*; use std::{ @@ -216,6 +216,7 @@ mod tests { front_als: Arc::new(tokio::sync::Mutex::new(None)), oes_tx, oes_throttle: Arc::new(Mutex::new(HashMap::new())), + oes_cache: OesEventCache::default(), }; (ctx, oes_rx) diff --git a/orb-backend-status/src/sender.rs b/orb-backend-status/src/sender.rs index da5265f25..08d55a68e 100644 --- a/orb-backend-status/src/sender.rs +++ b/orb-backend-status/src/sender.rs @@ -1,126 +1,91 @@ -use std::time::Duration; - -use crate::backend::status::StatusClient; -use crate::collectors::connectivity::GlobalConnectivity; +use crate::backend::client::{self, StatusClient}; use crate::dbus::intf_impl::CurrentStatus; +use crate::oes_cache::OesEventCache; use color_eyre::eyre::Result; -use tokio::sync::watch; +use std::time::Duration; use tokio::time::{self}; use tokio_util::sync::CancellationToken; -use tracing::{error, info}; +use tracing::error; #[derive(Clone)] pub struct BackendSender { client: StatusClient, interval: Duration, - min_backoff: Duration, - max_backoff: Duration, + oes_cache: OesEventCache, } impl BackendSender { pub fn new( client: StatusClient, + oes_cache: OesEventCache, interval: Duration, - min_backoff: Duration, - max_backoff: Duration, ) -> Self { Self { client, + oes_cache, interval, - min_backoff, - max_backoff, } } - pub async fn send_snapshot( - &self, - snapshot: &CurrentStatus, - token: &str, - ) -> Result<()> { - self.client.send_status(snapshot, token).await + pub async fn send_snapshot(&self, snapshot: &CurrentStatus) -> Result { + let mut req = snapshot.to_orb_status_api_v2_req().await; + req.oes_cached = true; + req.oes = Some(self.oes_cache.values()?); + + let res = match self.client.req(req).await { + Err(client::Err::MissingAttestToken | client::Err::NoConnectivity) => { + return Ok(false); + } + + Err(client::Err::Other(e)) => return Err(e), + + Ok(res) => res, + }; + + let status = res.status(); + if !status.is_success() { + let response_body = res.text().await.unwrap_or_default(); + return Err(eyre::eyre!( + "Backend status error: {} - {}", + status, + response_body + )); + } + + Ok(true) } pub async fn run_loop( self, backend_status: crate::dbus::intf_impl::BackendStatusImpl, - token_receiver: watch::Receiver, - mut connectivity_receiver: watch::Receiver, shutdown_token: CancellationToken, ) { - let mut backoff = self.min_backoff; - let max_backoff = self.max_backoff; - let mut interval = time::interval(self.interval); interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); loop { - let should_send_now = tokio::select! { + tokio::select! { _ = shutdown_token.cancelled() => break, // Periodic interval (30 seconds) - _ = interval.tick() => true, - - // Let connectivity watcher do it's thing - // It can trigger an urgent flag on WiFi SSID change - // TODO: this should not be here. It makes 0 sense. - // Manual SSID change tests feels slower without this - // Probably they are flaky, but I keep this for now. - // Need to think - _ = connectivity_receiver.changed() => false, + _ = interval.tick() => (), // Something urgent happened (reboot or SSID change) - _ = backend_status.wait_for_urgent_send() => true, + _ = backend_status.wait_for_urgent_send() => (), }; - let urgent_pending = backend_status.should_send_immediately(); - - // TODO: also remove this when the waking of connectivity_receiver is removed - if !should_send_now && !urgent_pending { - // Woke up due to connectivity/hardware_states change but nothing urgent - just loop back - continue; - } + let snapshot = backend_status.snapshot(); - // We want to send after this stage (interval ticked or urgent) - // So we check if we are connected. If not connected, go into wait for connection loop - let connected = connectivity_receiver.borrow().is_connected(); - if !connected { - loop { - tokio::select! { - _ = shutdown_token.cancelled() => return, - _ = connectivity_receiver.changed() => { - if connectivity_receiver.borrow().is_connected() { - break; - } - } + match self.send_snapshot(&snapshot).await { + Ok(sent) => { + if sent { + backend_status.clear_send_immediately(); + interval.reset(); } } - info!("connection restored, proceeding with send"); - } - let token = token_receiver.borrow().clone(); - if token.is_empty() { - error!("auth token not available yet - skipping send"); - continue; - } - - let snapshot = backend_status.snapshot(); - - // It should be OK to send now, but sometimes - // GlobalConnectivity does not fully guarantee that we can send - // So we still have a backoff - match self.send_snapshot(&snapshot, &token).await { - Ok(_) => { - backend_status.clear_send_immediately(); - backoff = self.min_backoff; - interval.reset(); - } Err(e) => { - error!("failed to send status (will backoff): {e:?}"); - tokio::select! { - _ = shutdown_token.cancelled() => break, - () = time::sleep(backoff) => {} - } - backoff = (backoff * 2).min(max_backoff); + error!("failed to send status : {e:?}"); } }; } diff --git a/orb-backend-status/tests/backend_status_service.rs b/orb-backend-status/tests/backend_status_service.rs index 073632488..1fcb8fb02 100644 --- a/orb-backend-status/tests/backend_status_service.rs +++ b/orb-backend-status/tests/backend_status_service.rs @@ -826,63 +826,6 @@ async fn it_sends_after_token_becomes_available() { assert!(after >= 1, "Expected send after token became available"); } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn it_stops_sending_when_token_revoked() { - // Arrange - let fx = Fixture::spawn_with_token(Duration::from_millis(100)).await; - - Mock::given(method("POST")) - .and(path("/")) - .respond_with(ResponseTemplate::new(200)) - .mount(&fx.mock_server) - .await; - - // Act - fx.start().await; - - fx.set_connected().await.expect("failed to set connected"); - tokio::time::sleep(Duration::from_millis(250)).await; - - let before_revoke = fx - .mock_server - .received_requests() - .await - .unwrap_or_default() - .len(); - assert!(before_revoke >= 1, "Should send with token"); - - fx.token_mock - .as_ref() - .unwrap() - .update_token("") - .await - .expect("failed to revoke token"); - - tokio::time::sleep(Duration::from_millis(300)).await; - - let after_revoke = fx - .mock_server - .received_requests() - .await - .unwrap_or_default() - .len(); - - tokio::time::sleep(Duration::from_millis(300)).await; - - let final_count = fx - .mock_server - .received_requests() - .await - .unwrap_or_default() - .len(); - - // Assert - assert_eq!( - after_revoke, final_count, - "Should stop sending after token revoked" - ); -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn it_includes_hardware_states_in_payload() { // Arrange diff --git a/orb-backend-status/tests/fixture.rs b/orb-backend-status/tests/fixture.rs index 9773be46b..003fba2b1 100644 --- a/orb-backend-status/tests/fixture.rs +++ b/orb-backend-status/tests/fixture.rs @@ -33,8 +33,6 @@ pub struct Fixture { procfs: PathBuf, netstats_poll_interval: Duration, sender_interval: Duration, - sender_min_backoff: Duration, - sender_max_backoff: Duration, req_timeout: Duration, req_min_retry_interval: Duration, req_max_retry_interval: Duration, @@ -56,8 +54,6 @@ impl Fixture { pub async fn builder( #[builder(default = Duration::from_secs(30))] netstats_poll_interval: Duration, #[builder(default = Duration::from_secs(30))] sender_interval: Duration, - #[builder(default = Duration::from_secs(1))] sender_min_backoff: Duration, - #[builder(default = Duration::from_secs(30))] sender_max_backoff: Duration, #[builder(default = Duration::from_secs(5))] req_timeout: Duration, #[builder(default = Duration::from_millis(100))] req_min_retry_interval: Duration, @@ -142,8 +138,6 @@ impl Fixture { mock_server, netstats_poll_interval, sender_interval, - sender_min_backoff, - sender_max_backoff, req_timeout, req_min_retry_interval, req_max_retry_interval, @@ -209,8 +203,6 @@ impl Fixture { let procfs = self.procfs.clone(); let netstats_poll_interval = self.netstats_poll_interval; let sender_interval = self.sender_interval; - let sender_min_backoff = self.sender_min_backoff; - let sender_max_backoff = self.sender_max_backoff; let req_timeout = self.req_timeout; let req_min_retry_interval = self.req_min_retry_interval; let req_max_retry_interval = self.req_max_retry_interval; @@ -228,8 +220,6 @@ impl Fixture { .procfs(procfs) .net_stats_poll_interval(netstats_poll_interval) .sender_interval(sender_interval) - .sender_min_backoff(sender_min_backoff) - .sender_max_backoff(sender_max_backoff) .req_timeout(req_timeout) .req_min_retry_interval(req_min_retry_interval) .req_max_retry_interval(req_max_retry_interval) From e94be75d72d110169f0579709a4e4cafc170dc7b Mon Sep 17 00:00:00 2001 From: vmenge Date: Fri, 27 Mar 2026 20:38:55 +0100 Subject: [PATCH 54/66] fix(backend-status): don't forget orb id :D (#1121) image --- orb-backend-status/src/backend/client.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/orb-backend-status/src/backend/client.rs b/orb-backend-status/src/backend/client.rs index 77e89a2b2..e04b77531 100644 --- a/orb-backend-status/src/backend/client.rs +++ b/orb-backend-status/src/backend/client.rs @@ -102,9 +102,10 @@ impl StatusClient { } Ok(_) = connectivity_rx.changed() => { - info!("connectivity status changed"); connectivity = connectivity_rx.borrow_and_update().clone(); if connectivity.is_connected() { + info!("connectivity status changed, rebuilding client"); + client = make_client() .inspect_err(|e| error!("failed to create http client: {e:?}"))?; } @@ -117,6 +118,7 @@ impl StatusClient { Err(Err::NoConnectivity) } else { let req = OrbStatusApiV2 { + orb_id: Some(orb_id.to_string()), orb_name: Some(orb_name.to_string()), jabil_id: Some(jabil_id.to_string()), version: Some(VersionApiV2 { From ffbbead72fd4929fb11b170949ba59e350304f40 Mon Sep 17 00:00:00 2001 From: vmenge Date: Sat, 28 Mar 2026 14:34:36 +0100 Subject: [PATCH 55/66] fix(jobs-agent): better conn change handling (#1122) ## changes better handling of `oes::ActiveConnections` to avoid unnecessary reconnections through orb relay client ## todo - [x] test on orb --- orb-jobs-agent/src/conn_change.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/orb-jobs-agent/src/conn_change.rs b/orb-jobs-agent/src/conn_change.rs index 9e92b3a51..c5ed07a8e 100644 --- a/orb-jobs-agent/src/conn_change.rs +++ b/orb-jobs-agent/src/conn_change.rs @@ -1,7 +1,7 @@ use crate::job_system::client::JobClient; use color_eyre::Result; use orb_info::OrbId; -use tracing::info; +use tracing::{info, warn}; use zenorb::Zenorb; /// forces relay reconnection every time there is a change to connectivity @@ -23,13 +23,20 @@ pub async fn spawn_watcher( let is_online = active_conns.connections.iter().any(|c|c.has_internet); let primary = active_conns.connections.iter().find(|c|c.primary).map(|c|&c.name); - if !is_online { - info!("detected changed in connectivity, but we have no global connectivity. doing nothing"); - return Ok(()) - } + match (is_online, primary) { + (true, Some(con)) => { + warn!("new primary connection: {con}, forcing relay reconnection"); + client.force_relay_reconnect().await?; + } + + (true, None) => { + warn!("detected changed in connectivity, but we have global connectivity but no primary connection. doing nothing"); + } - info!("new primary connection: {primary:?}, forcing relay reconnection"); - client.force_relay_reconnect().await?; + (false, _) => { + warn!("detected changed in connectivity, but we have no global connectivity. doing nothing"); + } + } Ok(()) }) From 99a83da839630d7f7c3a8849ba1fc4bd0be08739 Mon Sep 17 00:00:00 2001 From: vmenge Date: Sat, 28 Mar 2026 14:44:25 +0100 Subject: [PATCH 56/66] fix(connd): powercycle modem if we cant get sim info (#1123) ## changes power cycles the modem if we can't retrieve sim information ## todo - [ ] test on an orb --- orb-connd/src/modem/mod.rs | 16 ++++++++-------- orb-connd/src/reporters/active_connections.rs | 4 +++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/orb-connd/src/modem/mod.rs b/orb-connd/src/modem/mod.rs index 59b2dc08d..683a76ed8 100644 --- a/orb-connd/src/modem/mod.rs +++ b/orb-connd/src/modem/mod.rs @@ -105,14 +105,14 @@ async fn take_snapshot(mm: &dyn ModemManager) -> Result { let modem_info = mm.modem_info(&modem.id).await?; - let iccid = match modem_info.sim { - None => None, - Some(sim_id) => { - let sim_info = mm.sim_info(&sim_id).await?; + let sim_id = modem_info + .sim + .wrap_err("could not get sim id from modem info")?; - Some(sim_info.iccid) - } - }; + let sim_info = mm + .sim_info(&sim_id) + .await + .wrap_err("could not get sim info")?; let signal = mm .signal_get(&modem.id) @@ -129,7 +129,7 @@ async fn take_snapshot(mm: &dyn ModemManager) -> Result { Ok(Snapshot { id: modem.id, fw_revision: modem_info.fw_revision, - iccid, + iccid: Some(sim_info.iccid), imei: modem_info.imei, rat: modem_info.access_tech, operator: modem_info.operator_name, diff --git a/orb-connd/src/reporters/active_connections.rs b/orb-connd/src/reporters/active_connections.rs index e435cc027..f5de8f41f 100644 --- a/orb-connd/src/reporters/active_connections.rs +++ b/orb-connd/src/reporters/active_connections.rs @@ -143,6 +143,8 @@ async fn build_report( .await .map_err(|e: color_eyre::Report| format!("{e:#}")); + let iface = if iface == "cdc-wdm0" { "wwan0" } else { iface }; + report.connections.push(Connection { primary: is_primary(primary, &conn.id), name: conn.id.clone(), @@ -282,7 +284,7 @@ impl TryFrom for oes::ActiveConnections { let iface = match c.iface.to_lowercase().get(..3) { Some("eth") => NetworkInterface::Ethernet, Some("wla") => NetworkInterface::WiFi, - Some("wwa") => NetworkInterface::Cellular, + Some("wwa") | Some("cdc") => NetworkInterface::Cellular, _ => bail!("{} is not a valid network interface", c.iface), }; From 98b82d2265216a3dee113bca34098a893be0b1ea Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 30 Mar 2026 14:25:48 +0200 Subject: [PATCH 57/66] refactor: remove serial logs from the ota command (#1093) the serial logs will be recorded differently --- hil/src/commands/ota/mod.rs | 94 +++++++++++++++++----------------- hil/src/commands/ota/reboot.rs | 18 ++++--- 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index 633fb9663..c1b6ca21c 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -14,7 +14,7 @@ use color_eyre::{ use secrecy::SecretString; use tracing::{error, info, instrument}; -use crate::{OrbConfig, Platform}; +use crate::OrbConfig; mod monitor; mod reboot; @@ -54,12 +54,13 @@ pub struct Ota { timeout_secs: u64, /// Path to save journalctl logs from worldcoin-update-agent.service + /// Optional: if nothing is passed no logs are recorded #[arg(long)] - log_file: PathBuf, + log_file: Option, } impl Ota { - /// Get the serial port path from orb_config + /// Path to save journalctl logs from worldcoin-update-agent.service fn get_serial_path(orb_config: &OrbConfig) -> Result<&PathBuf> { orb_config .serial_path @@ -67,6 +68,30 @@ impl Ota { .wrap_err("serial-path must be specified") } + async fn prepare_for_ota( + &self, + orb_config: &OrbConfig, + session: &RemoteSession, + ) -> Result { + info!("Wiping overlays before update"); + system::wipe_overlays(session).await.inspect_err(|e| { + error!("Failed to wipe overlays: {}", e); + })?; + info!("Overlays wiped successfully, rebooting device"); + + system::reboot_orb(session).await?; + info!("Reboot command sent to Orb device"); + + self.handle_reboot("wipe_overlays", orb_config) + .await + .inspect_err(|e| { + error!( + "Failed to reboot and reconnect after wiping overlays: {}", + e + ); + }) + } + #[instrument] pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let _start_time = Instant::now(); @@ -77,33 +102,8 @@ impl Ota { println!("OTA_ERROR=REMOTE_CONNECTION_FAILED: {e}"); })?; - let platform = orb_config - .platform - .wrap_err("platform must be specified for OTA")?; - - let (session, wipe_overlays_status) = match platform { - Platform::Diamond | Platform::Pearl => { - info!("Wiping overlays before update"); - system::wipe_overlays(&session).await.inspect_err(|e| { - error!("Failed to wipe overlays: {}", e); - })?; - info!("Overlays wiped successfully, rebooting device"); - - system::reboot_orb(&session).await?; - info!("Reboot command sent to Orb device"); - - let new_session = self - .handle_reboot("wipe_overlays", orb_config) - .await - .inspect_err(|e| { - error!( - "Failed to reboot and reconnect after wiping overlays: {}", - e - ); - })?; - (new_session, "succeeded".to_string()) - } - }; + let session = self.prepare_for_ota(orb_config, &session).await?; + let wipe_overlays_status = "succeeded".to_string(); let current_slot = system::get_current_slot(&session).await.inspect_err(|e| { @@ -262,21 +262,23 @@ impl Ota { println!("OTA_SLOT_FINAL={}", current_slot); println!("OTA_WIPE_OVERLAYS_FINAL={}", wipe_overlays_status); - let platform_name = orb_config - .platform - .map(|p| format!("{p}")) - .unwrap_or_else(|| "unknown".to_string()); - let log_dir = self - .log_file - .parent() - .unwrap_or_else(|| std::path::Path::new(".")); - for suffix in ["wipe_overlays", "update"] { - let path = log_dir.join(format!("boot_log_{platform_name}_{suffix}.txt")); - println!(); - println!("=== boot_log_{platform_name}_{suffix}.txt ==="); - match tokio::fs::read_to_string(&path).await { - Ok(contents) => print!("{contents}"), - Err(e) => println!(" (not available: {e})"), + if let Some(log_file) = &self.log_file { + let platform_name = orb_config + .platform + .map(|p| format!("{p}")) + .unwrap_or_else(|| "unknown".to_string()); + let log_dir = log_file + .parent() + .unwrap_or_else(|| std::path::Path::new(".")); + for suffix in ["wipe_overlays", "update"] { + let path = + log_dir.join(format!("boot_log_{platform_name}_{suffix}.txt")); + println!(); + println!("=== boot_log_{platform_name}_{suffix}.txt ==="); + match tokio::fs::read_to_string(&path).await { + Ok(contents) => print!("{contents}"), + Err(e) => println!(" (not available: {e})"), + } } } @@ -375,7 +377,7 @@ mod test { key_path: None, port: 22, timeout_secs: 7200, - log_file: PathBuf::from("/tmp/ota.log"), + log_file: None, } } diff --git a/hil/src/commands/ota/reboot.rs b/hil/src/commands/ota/reboot.rs index 1f91def58..d744ad9f8 100644 --- a/hil/src/commands/ota/reboot.rs +++ b/hil/src/commands/ota/reboot.rs @@ -7,6 +7,7 @@ use color_eyre::{ Result, }; use futures::StreamExt; +use std::path::Path; use std::time::{Duration, Instant}; use tokio::sync::broadcast; use tokio_serial::SerialPortBuilderExt; @@ -42,7 +43,9 @@ impl Ota { Ok(()) }); - self.capture_boot_logs(log_suffix, orb_config).await?; + if let Some(log_file) = &self.log_file { + Self::capture_boot_logs(log_file, log_suffix, orb_config).await?; + } let start_time = Instant::now(); let timeout = Duration::from_secs(900); // 15 minutes @@ -117,7 +120,7 @@ impl Ota { #[instrument(skip_all)] async fn capture_boot_logs( - &self, + log_file: &Path, log_suffix: &str, orb_config: &OrbConfig, ) -> Result<()> { @@ -131,8 +134,7 @@ impl Ota { log_suffix, platform_name ); - let boot_log_path = self - .log_file + let boot_log_path = log_file .parent() .unwrap_or_else(|| std::path::Path::new(".")) .join(format!("boot_log_{platform_name}_{log_suffix}.txt")); @@ -189,7 +191,10 @@ impl Ota { if let Ok(text) = String::from_utf8(bytes.to_vec()) && text.contains(LOGIN_PROMPT_PATTERN) { - info!("Login prompt detected in boot logs after {:?}, stopping capture", start_time.elapsed()); + info!( + "Login prompt detected in boot logs after {:?}, stopping capture", + start_time.elapsed() + ); found_login_prompt = true; break; } @@ -212,7 +217,8 @@ impl Ota { if start_time.elapsed() >= timeout && !found_login_prompt { warn!( - "Boot log capture timed out after {:?} without finding login prompt. Will proceed with SSH reconnection anyway.", + "Boot log capture timed out after {:?} without finding login prompt. \ + Will proceed with SSH reconnection anyway.", timeout ); } From 035cd2fd6100861086c9ab54748af7b16a72f971 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Mon, 30 Mar 2026 15:26:57 +0200 Subject: [PATCH 58/66] chore(hil): bump orb-hil to beta 22 (#1124) f --- nix/packages/orb-hil.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index 6b45c5940..2502daf3a 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.21"; + version = "0.0.2-beta.22"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-6WmSjaWnsgLy1GYOpzNGv80mpCYB3sWMZ/ycKIwJPwU="; + sha256 = "sha256-+JqlhaU7q2wCpQV3UMDRBtxmrU2+Ync33iH/OpRiobY="; }; dontUnpack = true; From 28d3bc0ee5b563cad013b5ac816bcbc316f0499f Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 31 Mar 2026 12:48:05 +0200 Subject: [PATCH 59/66] feat(hil): add copy command (#1125) Add two new commands to orb-hil for copying files to/from an Orb over SSH or Teleport: ``` orb-hil copy-to --local ./file.bin --orb /tmp/file.bin --transport ssh --password "..." orb-hil copy-from --orb /tmp/log.txt --local ./log.txt --transport teleport ``` Also, refactor the duplicates in working with remote commands Tested: - ssh copying - teleport copying - remote cmd - ota --- hil/examples/orb_verify.rs | 59 ++++----- hil/src/commands/cmd.rs | 147 ++++------------------ hil/src/commands/copy.rs | 96 ++++++++++++++ hil/src/commands/mod.rs | 2 + hil/src/commands/ota/mod.rs | 153 ++-------------------- hil/src/lib.rs | 2 +- hil/src/main.rs | 4 + hil/src/orb.rs | 7 -- hil/src/remote_cmd.rs | 245 +++++++++++++++++++++++++++++++----- hil/src/ssh_wrapper.rs | 54 +++++++- 10 files changed, 427 insertions(+), 342 deletions(-) create mode 100644 hil/src/commands/copy.rs diff --git a/hil/examples/orb_verify.rs b/hil/examples/orb_verify.rs index d4968619e..b81a6bc27 100644 --- a/hil/examples/orb_verify.rs +++ b/hil/examples/orb_verify.rs @@ -19,9 +19,7 @@ use std::time::Duration; use clap::{Parser, Subcommand}; use color_eyre::{eyre::WrapErr, Result}; use dialoguer::Password; -use orb_hil::{ - mcu_util, verify, AuthMethod, RemoteConnectArgs, RemoteSession, RemoteTransport, -}; +use orb_hil::{mcu_util, verify, RemoteArgs, RemoteSession, RemoteTransport}; use secrecy::SecretString; use tracing::info; use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter}; @@ -74,42 +72,35 @@ enum VerifyCommand { } impl Cli { - fn get_auth_method(&self) -> Result { - match (&self.password, &self.key_path) { - // Key path takes precedence if both are somehow provided - (_, Some(key_path)) => Ok(AuthMethod::Key { - private_key_path: key_path.clone(), - }), - (Some(password), None) => Ok(AuthMethod::Password(password.clone())), - (None, None) => { - // Prompt for password interactively - let password = Password::new() - .with_prompt(format!( - "Password for {}@{}", - self.username, self.hostname - )) - .interact() - .wrap_err("Failed to read password")?; - - Ok(AuthMethod::Password(SecretString::from(password))) - } - } - } - async fn connect_ssh(&self) -> Result { - let connect_args = RemoteConnectArgs { - transport: RemoteTransport::Ssh, + let password = if self.password.is_some() || self.key_path.is_some() { + self.password.clone() + } else { + let pw = Password::new() + .with_prompt(format!( + "Password for {}@{}", + self.username, self.hostname + )) + .interact() + .wrap_err("Failed to read password")?; + Some(SecretString::from(pw)) + }; + + let remote = RemoteArgs { hostname: Some(self.hostname.clone()), - orb_id: None, username: Some(self.username.clone()), port: self.port, - auth: Some(self.get_auth_method()?), - timeout: Duration::from_secs(30), + password, + key_path: self.key_path.clone(), }; - - RemoteSession::connect(connect_args) - .await - .wrap_err("Failed to establish SSH connection to Orb device") + RemoteSession::connect( + remote, + RemoteTransport::Ssh, + Duration::from_secs(30), + None, + ) + .await + .wrap_err("Failed to establish SSH connection to Orb device") } } diff --git a/hil/src/commands/cmd.rs b/hil/src/commands/cmd.rs index 19ad1a9bb..6204e7d0c 100644 --- a/hil/src/commands/cmd.rs +++ b/hil/src/commands/cmd.rs @@ -1,7 +1,7 @@ #![allow(clippy::uninlined_format_args)] -use std::{path::PathBuf, pin::pin, time::Duration}; +use std::{pin::pin, time::Duration}; -use crate::{AuthMethod, RemoteConnectArgs, RemoteSession, RemoteTransport}; +use crate::{RemoteArgs, RemoteTransport}; use bytes::Bytes; use clap::Parser; use color_eyre::{ @@ -10,7 +10,6 @@ use color_eyre::{ }; use futures::{TryStream, TryStreamExt as _}; use humantime::parse_duration; -use secrecy::SecretString; use tokio::{ io::{AsyncRead, AsyncWrite, AsyncWriteExt as _}, sync::broadcast, @@ -52,29 +51,12 @@ pub struct Cmd { #[arg(long, value_enum, default_value_t = CommandTransport::Serial)] transport: CommandTransport, - /// Override the SSH hostname (takes precedence over --orb-id derived hostname) - #[arg(long)] - hostname: Option, - - /// Username for SSH/Teleport - #[arg(long)] - username: Option, - - /// SSH port (used only with --transport ssh) - #[arg(long, default_value = "22")] - port: u16, - - /// Password for SSH authentication (mutually exclusive with --key-path) - #[arg(long)] - password: Option, - - /// Path to SSH private key (mutually exclusive with --password) - #[arg(long)] - key_path: Option, - /// Timeout duration (e.g., "10s", "500ms") #[arg(long, default_value = "10s", value_parser = parse_duration)] timeout: Duration, + + #[command(flatten)] + remote: RemoteArgs, } impl Cmd { @@ -111,24 +93,10 @@ impl Cmd { transport: RemoteTransport, orb_config: &OrbConfig, ) -> Result<()> { - let auth = self.resolve_remote_auth(transport)?; - - let connect_args = RemoteConnectArgs { - transport, - hostname: match transport { - // teleport needs to resolve the hostname, so we ignore it - RemoteTransport::Teleport => None, - RemoteTransport::Ssh => { - self.hostname.clone().or_else(|| orb_config.get_hostname()) - } - }, - orb_id: orb_config.orb_id.clone(), - username: self.username, - port: self.port, - auth, - timeout: self.timeout, - }; - let session = RemoteSession::connect(connect_args).await?; + let session = self + .remote + .connect(transport, self.timeout, orb_config) + .await?; let command_result = tokio::time::timeout(self.timeout, session.execute_command(&self.cmd)) @@ -139,7 +107,7 @@ impl Cmd { print!("{}", command_result.stdout); eprint!("{}", command_result.stderr); if !command_result.is_success() { - bail!( + color_eyre::eyre::bail!( "command returned nonzero error code: {}", command_result.exit_status ); @@ -147,37 +115,6 @@ impl Cmd { Ok(()) } - - fn resolve_remote_auth( - &self, - transport: RemoteTransport, - ) -> Result> { - match transport { - RemoteTransport::Ssh => match (&self.password, &self.key_path) { - (Some(password), None) => { - Ok(Some(AuthMethod::Password(password.clone()))) - } - (None, Some(private_key_path)) => Ok(Some(AuthMethod::Key { - private_key_path: private_key_path.clone(), - })), - (None, None) => { - bail!("--transport ssh requires --password or --key-path") - } - (Some(_), Some(_)) => { - bail!("--password and --key-path are mutually exclusive") - } - }, - RemoteTransport::Teleport => { - if self.password.is_some() || self.key_path.is_some() { - bail!( - "--password/--key-path can only be used with --transport ssh" - ); - } - - Ok(None) - } - } - } } /// [`Cmd::run`], but the portion that is actually testable. @@ -306,61 +243,23 @@ mod test { Cmd { cmd: "pwd".to_owned(), transport: CommandTransport::Ssh, - hostname: None, - username: None, - port: 22, - password: None, - key_path: None, timeout: Duration::from_secs(5), + remote: RemoteArgs { + hostname: None, + username: None, + port: 22, + password: None, + key_path: None, + }, } } #[test] - fn ssh_transport_requires_auth() { - let cmd = sample_cmd(); - let err = cmd - .resolve_remote_auth(RemoteTransport::Ssh) - .expect_err("ssh must require auth"); - assert!(err - .to_string() - .contains("--transport ssh requires --password or --key-path")); - } - - #[test] - fn ssh_transport_accepts_password_auth() { - let mut cmd = sample_cmd(); - cmd.password = Some(SecretString::from("password".to_owned())); - - let auth = cmd - .resolve_remote_auth(RemoteTransport::Ssh) - .expect("password auth should be accepted"); - assert!(matches!(auth, Some(AuthMethod::Password(_)))); - } - - #[test] - fn ssh_transport_rejects_both_auth_methods() { - let mut cmd = sample_cmd(); - cmd.password = Some(SecretString::from("password".to_owned())); - cmd.key_path = Some(PathBuf::from("/tmp/id_rsa")); - - let err = cmd - .resolve_remote_auth(RemoteTransport::Ssh) - .expect_err("ssh must reject dual auth methods"); - assert!(err - .to_string() - .contains("--password and --key-path are mutually exclusive")); - } - - #[test] - fn teleport_transport_rejects_ssh_auth_flags() { - let mut cmd = sample_cmd(); - cmd.password = Some(SecretString::from("password".to_owned())); - - let err = cmd - .resolve_remote_auth(RemoteTransport::Teleport) - .expect_err("teleport must reject ssh auth flags"); - assert!(err - .to_string() - .contains("--password/--key-path can only be used with --transport ssh")); + fn serial_transport_has_no_remote_transport() { + let cmd = Cmd { + transport: CommandTransport::Serial, + ..sample_cmd() + }; + assert!(cmd.transport.remote_transport().is_none()); } } diff --git a/hil/src/commands/copy.rs b/hil/src/commands/copy.rs new file mode 100644 index 000000000..509f83e7a --- /dev/null +++ b/hil/src/commands/copy.rs @@ -0,0 +1,96 @@ +use std::{path::PathBuf, time::Duration}; + +use clap::Parser; +use color_eyre::{eyre::Context as _, Result}; +use humantime::parse_duration; +use tracing::info; + +use crate::{remote_cmd::CopyDirection, OrbConfig, RemoteArgs, RemoteTransport}; + +/// Copy a local file to the Orb. +#[derive(Debug, Parser)] +pub struct CopyTo { + /// Path to the local file to copy. + #[arg(long)] + local: PathBuf, + + /// Destination path on the Orb. + #[arg(long)] + orb: PathBuf, + + /// Transport to use for the copy + #[arg(long, value_enum, default_value_t = RemoteTransport::Ssh)] + transport: RemoteTransport, + + /// Timeout duration (e.g., "60s", "2m") + #[arg(long, default_value = "60s", value_parser = parse_duration)] + timeout: Duration, + + #[command(flatten)] + remote: RemoteArgs, +} + +/// Copy a file from the Orb to the local machine. +#[derive(Debug, Parser)] +pub struct CopyFrom { + /// Source path on the Orb. + #[arg(long)] + orb: PathBuf, + + /// Destination path on the local machine. + #[arg(long)] + local: PathBuf, + + /// Transport to use for the copy + #[arg(long, value_enum, default_value_t = RemoteTransport::Ssh)] + transport: RemoteTransport, + + /// Timeout duration (e.g., "60s", "2m") + #[arg(long, default_value = "60s", value_parser = parse_duration)] + timeout: Duration, + + #[command(flatten)] + remote: RemoteArgs, +} + +impl CopyTo { + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { + let session = self + .remote + .connect(self.transport, self.timeout, orb_config) + .await?; + info!( + "Copying {} -> orb:{}", + self.local.display(), + self.orb.display() + ); + tokio::time::timeout( + self.timeout, + session.copy_file(&self.local, &self.orb, CopyDirection::Upload), + ) + .await + .wrap_err("copy timed out")? + .wrap_err("copy failed") + } +} + +impl CopyFrom { + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { + let session = self + .remote + .connect(self.transport, self.timeout, orb_config) + .await?; + info!( + "Copying orb:{} -> {}", + self.orb.display(), + self.local.display() + ); + tokio::time::timeout( + self.timeout, + session.copy_file(&self.local, &self.orb, CopyDirection::Download), + ) + .await + .wrap_err("copy timed out")? + .wrap_err("copy failed") + } +} diff --git a/hil/src/commands/mod.rs b/hil/src/commands/mod.rs index 84263b332..1fcfdcd32 100644 --- a/hil/src/commands/mod.rs +++ b/hil/src/commands/mod.rs @@ -1,5 +1,6 @@ mod button_ctrl; mod cmd; +mod copy; mod fetch_persistent; mod flash; mod login; @@ -12,6 +13,7 @@ mod set_recovery_pin; pub use self::button_ctrl::ButtonCtrl; pub use self::cmd::Cmd; +pub use self::copy::{CopyFrom, CopyTo}; pub use self::fetch_persistent::FetchPersistent; pub use self::flash::Flash; pub use self::login::Login; diff --git a/hil/src/commands/ota/mod.rs b/hil/src/commands/ota/mod.rs index c1b6ca21c..8832a353b 100644 --- a/hil/src/commands/ota/mod.rs +++ b/hil/src/commands/ota/mod.rs @@ -5,13 +5,12 @@ use crate::mcu_util::{ check_jetson_post_ota, check_main_board_versions_match, check_security_board_versions_match, }; -use crate::{AuthMethod, RemoteConnectArgs, RemoteSession, RemoteTransport}; +use crate::{RemoteArgs, RemoteSession, RemoteTransport}; use clap::Parser; use color_eyre::{ - eyre::{bail, ContextCompat, WrapErr}, + eyre::{ContextCompat, WrapErr}, Result, }; -use secrecy::SecretString; use tracing::{error, info, instrument}; use crate::OrbConfig; @@ -23,7 +22,6 @@ mod system; use crate::verify; #[derive(Debug, Parser)] -#[command(group = clap::ArgGroup::new("auth").multiple(false))] pub struct Ota { /// Target version to update to #[arg(long)] @@ -33,22 +31,6 @@ pub struct Ota { #[arg(long, value_enum, default_value_t = RemoteTransport::Ssh)] transport: RemoteTransport, - /// Username - #[arg(long)] - username: Option, - - /// Password for authentication (mutually exclusive with --key-path) - #[arg(long, group = "auth")] - password: Option, - - /// Path to SSH private key for authentication (mutually exclusive with --password) - #[arg(long, group = "auth")] - key_path: Option, - - /// SSH port for the Orb device - #[arg(long, default_value = "22")] - port: u16, - /// Timeout for the entire OTA process in seconds #[arg(long, default_value = "7200")] // 2 hours by default timeout_secs: u64, @@ -57,6 +39,9 @@ pub struct Ota { /// Optional: if nothing is passed no logs are recorded #[arg(long)] log_file: Option, + + #[command(flatten)] + remote: RemoteArgs, } impl Ota { @@ -305,129 +290,11 @@ impl Ota { async fn connect_remote(&self, orb_config: &OrbConfig) -> Result { const CONNECT_TIMEOUT: Duration = Duration::from_secs(30); - let auth = self.resolve_remote_auth()?; - - let hostname = orb_config - .get_hostname() - .wrap_err("orb-id must be specified to derive hostname")?; - - info!("Connecting to Orb device at {}:{}", hostname, self.port); - - let connect_args = RemoteConnectArgs { - transport: self.transport, - hostname: Some(hostname), - orb_id: orb_config.orb_id.clone(), - username: self.username.clone(), - port: self.port, - auth, - timeout: CONNECT_TIMEOUT, - }; - - let session = RemoteSession::connect(connect_args) + info!("Connecting to Orb device at port {}", self.remote.port); + self.remote + .clone() + .connect(self.transport, CONNECT_TIMEOUT, orb_config) .await - .wrap_err("Failed to establish remote connection to Orb device")?; - - info!("Successfully connected to Orb device"); - - Ok(session) - } - - fn resolve_remote_auth(&self) -> Result> { - match self.transport { - RemoteTransport::Ssh => match (&self.password, &self.key_path) { - (Some(password), None) => { - Ok(Some(AuthMethod::Password(password.clone()))) - } - (None, Some(private_key_path)) => Ok(Some(AuthMethod::Key { - private_key_path: private_key_path.clone(), - })), - (None, None) => { - bail!("--transport ssh requires --password or --key-path") - } - (Some(_), Some(_)) => { - bail!("--password and --key-path are mutually exclusive") - } - }, - RemoteTransport::Teleport => { - if self.password.is_some() || self.key_path.is_some() { - bail!( - "--password/--key-path can only be used with --transport ssh" - ); - } - if self.port != 22 { - bail!("--transport teleport does not use --port (must be 22)"); - } - - Ok(None) - } - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - fn sample_ota() -> Ota { - Ota { - target_version: "test-version".to_owned(), - transport: RemoteTransport::Ssh, - username: None, - password: None, - key_path: None, - port: 22, - timeout_secs: 7200, - log_file: None, - } - } - - #[test] - fn ssh_transport_requires_auth() { - let ota = sample_ota(); - let err = ota - .resolve_remote_auth() - .expect_err("ssh must require auth"); - assert!(err - .to_string() - .contains("--transport ssh requires --password or --key-path")); - } - - #[test] - fn ssh_transport_accepts_password_auth() { - let mut ota = sample_ota(); - ota.password = Some(SecretString::from("password".to_owned())); - - let auth = ota - .resolve_remote_auth() - .expect("password auth should be accepted"); - assert!(matches!(auth, Some(AuthMethod::Password(_)))); - } - - #[test] - fn teleport_transport_rejects_auth_flags() { - let mut ota = sample_ota(); - ota.transport = RemoteTransport::Teleport; - ota.password = Some(SecretString::from("password".to_owned())); - - let err = ota - .resolve_remote_auth() - .expect_err("teleport must reject ssh auth flags"); - assert!(err - .to_string() - .contains("--password/--key-path can only be used with --transport ssh")); - } - - #[test] - fn teleport_transport_rejects_custom_port() { - let mut ota = sample_ota(); - ota.transport = RemoteTransport::Teleport; - ota.port = 3022; - - let err = ota - .resolve_remote_auth() - .expect_err("teleport must reject custom ssh port"); - assert!(err - .to_string() - .contains("--transport teleport does not use --port")); + .wrap_err("Failed to establish remote connection to Orb device") } } diff --git a/hil/src/lib.rs b/hil/src/lib.rs index f66a5e6b5..ef10cfe71 100644 --- a/hil/src/lib.rs +++ b/hil/src/lib.rs @@ -19,7 +19,7 @@ pub mod verify; pub mod mcu_util; pub use orb::{orb_manager_from_config, BootMode, OrbConfig, OrbManager, Platform}; -pub use remote_cmd::{RemoteConnectArgs, RemoteSession, RemoteTransport}; +pub use remote_cmd::{RemoteArgs, RemoteSession, RemoteTransport}; pub use ssh_wrapper::AuthMethod; fn current_dir() -> camino::Utf8PathBuf { diff --git a/hil/src/main.rs b/hil/src/main.rs index 269355369..5c96a1ab3 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -24,6 +24,8 @@ struct Cli { enum Commands { ButtonCtrl(commands::ButtonCtrl), Cmd(commands::Cmd), + CopyFrom(commands::CopyFrom), + CopyTo(commands::CopyTo), FetchPersistent(commands::FetchPersistent), Flash(commands::Flash), Login(commands::Login), @@ -63,6 +65,8 @@ async fn main() -> Result<()> { match args.commands { Commands::ButtonCtrl(c) => c.run(&orb_config).await, Commands::Cmd(c) => c.run(&orb_config).await, + Commands::CopyFrom(c) => c.run(&orb_config).await, + Commands::CopyTo(c) => c.run(&orb_config).await, Commands::FetchPersistent(c) => c.run().await, Commands::Flash(c) => c.run().await, Commands::Login(c) => c.run(&orb_config).await, diff --git a/hil/src/orb.rs b/hil/src/orb.rs index fa6e5982f..a4ed30b82 100644 --- a/hil/src/orb.rs +++ b/hil/src/orb.rs @@ -46,9 +46,6 @@ pub struct OrbConfig { #[arg(long)] pub orb_id: Option, - #[arg(long)] - pub hostname: Option, - /// Platform type (diamond or pearl) #[arg(long, value_enum)] pub platform: Option, @@ -94,12 +91,8 @@ impl OrbConfig { } } - /// Creates a hostname from the orb_id by prepending "orb-". /// Returns None if orb_id is not set. pub fn get_hostname(&self) -> Option { - if self.hostname.is_some() { - return self.hostname.clone(); - } self.orb_id.as_ref().map(|id| format!("orb-{}.local", id)) } } diff --git a/hil/src/remote_cmd.rs b/hil/src/remote_cmd.rs index 3509904ea..78da522fd 100644 --- a/hil/src/remote_cmd.rs +++ b/hil/src/remote_cmd.rs @@ -1,9 +1,12 @@ +use std::path::{Path, PathBuf}; use std::time::Duration; +use clap::Args; use color_eyre::{ eyre::{bail, WrapErr as _}, Result, }; +use secrecy::SecretString; use tokio::process::Command; use tracing::{debug, info}; @@ -12,23 +15,88 @@ use crate::ssh_wrapper::{AuthMethod, CommandResult, SshConnectArgs, SshWrapper}; pub const DEFAULT_SSH_USERNAME: &str = "worldcoin"; pub const DEFAULT_TELEPORT_USERNAME: &str = "root"; +/// Shared clap args for SSH/Teleport remote connections. Flatten into a command +/// with `#[command(flatten)]`. The command is responsible for its own +/// `--transport` and `--timeout` fields since their defaults differ per command. +#[derive(Debug, Clone, Args)] +pub struct RemoteArgs { + /// Override the SSH hostname (takes precedence over --orb-id derived hostname) + #[arg(long)] + pub hostname: Option, + + /// Username for SSH/Teleport + #[arg(long)] + pub username: Option, + + /// SSH port (used only with --transport ssh) + #[arg(long, default_value = "22")] + pub port: u16, + + /// Password for SSH authentication (mutually exclusive with --key-path) + #[arg(long)] + pub password: Option, + + /// Path to SSH private key (mutually exclusive with --password) + #[arg(long)] + pub key_path: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum CopyDirection { + /// Copy a local file to the remote host. + Upload, + /// Copy a remote file to the local host. + Download, +} + +impl RemoteArgs { + pub fn resolve_auth( + &self, + transport: RemoteTransport, + ) -> Result> { + match transport { + RemoteTransport::Ssh => match (&self.password, &self.key_path) { + (Some(password), None) => { + Ok(Some(AuthMethod::Password(password.clone()))) + } + (None, Some(private_key_path)) => Ok(Some(AuthMethod::Key { + private_key_path: private_key_path.clone(), + })), + (None, None) => { + bail!("--transport ssh requires --password or --key-path") + } + (Some(_), Some(_)) => { + bail!("--password and --key-path are mutually exclusive") + } + }, + RemoteTransport::Teleport => { + if self.password.is_some() || self.key_path.is_some() { + bail!( + "--password/--key-path can only be used with --transport ssh" + ); + } + Ok(None) + } + } + } + + pub async fn connect( + self, + transport: RemoteTransport, + timeout: Duration, + orb_config: &crate::orb::OrbConfig, + ) -> Result { + RemoteSession::connect(self, transport, timeout, orb_config.orb_id.clone()) + .await + } +} + #[derive(Debug, Clone, Copy, clap::ValueEnum)] pub enum RemoteTransport { Ssh, Teleport, } -#[derive(Debug, Clone)] -pub struct RemoteConnectArgs { - pub transport: RemoteTransport, - pub hostname: Option, - pub orb_id: Option, - pub username: Option, - pub port: u16, - pub auth: Option, - pub timeout: Duration, -} - pub struct RemoteSession { inner: RemoteSessionInner, } @@ -45,21 +113,24 @@ struct TeleportSession { } impl RemoteSession { - pub async fn connect(args: RemoteConnectArgs) -> Result { - match args.transport { + pub async fn connect( + args: RemoteArgs, + transport: RemoteTransport, + timeout: Duration, + orb_id: Option, + ) -> Result { + match transport { RemoteTransport::Ssh => { - let hostname = resolve_ssh_hostname( - args.hostname.as_deref(), - args.orb_id.as_deref(), - )?; - let username = args - .username - .unwrap_or_else(|| DEFAULT_SSH_USERNAME.to_owned()); - let auth = args.auth.ok_or_else(|| { + let hostname = + resolve_ssh_hostname(args.hostname.as_deref(), orb_id.as_deref())?; + let auth = args.resolve_auth(transport)?.ok_or_else(|| { color_eyre::eyre::eyre!( "ssh transport requires password or key authentication" ) })?; + let username = args + .username + .unwrap_or_else(|| DEFAULT_SSH_USERNAME.to_owned()); let connect_args = SshConnectArgs { hostname, @@ -67,20 +138,19 @@ impl RemoteSession { username, auth, }; - let session = tokio::time::timeout( - args.timeout, - SshWrapper::connect(connect_args), - ) - .await - .wrap_err("ssh connection timed out")? - .wrap_err("failed to establish ssh connection")?; + let session = + tokio::time::timeout(timeout, SshWrapper::connect(connect_args)) + .await + .wrap_err("ssh connection timed out")? + .wrap_err("failed to establish ssh connection")?; Ok(Self { inner: RemoteSessionInner::Ssh(session), }) } RemoteTransport::Teleport => { - if args.auth.is_some() { + let auth = args.resolve_auth(transport)?; + if auth.is_some() { bail!( "teleport transport does not support --password or --key-path" ); @@ -91,8 +161,8 @@ impl RemoteSession { let target = resolve_teleport_target( args.hostname.as_deref(), - args.orb_id.as_deref(), - args.timeout, + orb_id.as_deref(), + timeout, ) .await?; let username = args @@ -104,7 +174,7 @@ impl RemoteSession { inner: RemoteSessionInner::Teleport(TeleportSession { target, username, - timeout: args.timeout, + timeout, }), }; session.test_connection().await?; @@ -141,6 +211,22 @@ impl RemoteSession { Ok(()) } + + pub async fn copy_file( + &self, + local: &Path, + remote: &Path, + direction: CopyDirection, + ) -> Result<()> { + match &self.inner { + RemoteSessionInner::Ssh(session) => { + session.copy_file(local, remote, direction).await + } + RemoteSessionInner::Teleport(session) => { + session.copy_file(local, remote, direction).await + } + } + } } impl TeleportSession { @@ -164,6 +250,42 @@ impl TeleportSession { exit_status: output.status.code().unwrap_or(-1), }) } + + async fn copy_file( + &self, + local: &Path, + remote: &Path, + direction: CopyDirection, + ) -> Result<()> { + let remote_spec = + format!("{}@{}:{}", self.username, self.target, remote.display()); + + let mut tsh = Command::new("tsh"); + tsh.arg("scp"); + + match direction { + CopyDirection::Upload => { + debug!("tsh scp upload: {} -> {}", local.display(), remote_spec); + tsh.arg(local).arg(&remote_spec); + } + CopyDirection::Download => { + debug!("tsh scp download: {} -> {}", remote_spec, local.display()); + tsh.arg(&remote_spec).arg(local); + } + } + + let output = tokio::time::timeout(self.timeout, tsh.output()) + .await + .wrap_err("tsh scp timed out")? + .wrap_err("failed to execute tsh scp")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("tsh scp failed: {}", stderr); + } + + Ok(()) + } } fn resolve_ssh_hostname( @@ -319,4 +441,63 @@ orb 22222222-2222-2222-2222-222222222222 \u{2190} Tunnel orb-id=bba85bbf,orb-nam .to_string() .contains("ssh transport requires hostname or orb-id")); } + + fn sample_remote_args() -> RemoteArgs { + RemoteArgs { + hostname: None, + username: None, + port: 22, + password: None, + key_path: None, + } + } + + #[test] + fn ssh_transport_requires_auth() { + let args = sample_remote_args(); + let err = args + .resolve_auth(RemoteTransport::Ssh) + .expect_err("ssh must require auth"); + assert!(err + .to_string() + .contains("--transport ssh requires --password or --key-path")); + } + + #[test] + fn ssh_transport_accepts_password_auth() { + let mut args = sample_remote_args(); + args.password = Some(SecretString::from("password".to_owned())); + + let auth = args + .resolve_auth(RemoteTransport::Ssh) + .expect("password auth should be accepted"); + assert!(matches!(auth, Some(AuthMethod::Password(_)))); + } + + #[test] + fn ssh_transport_rejects_both_auth_methods() { + let mut args = sample_remote_args(); + args.password = Some(SecretString::from("password".to_owned())); + args.key_path = Some(PathBuf::from("/tmp/id_rsa")); + + let err = args + .resolve_auth(RemoteTransport::Ssh) + .expect_err("ssh must reject dual auth methods"); + assert!(err + .to_string() + .contains("--password and --key-path are mutually exclusive")); + } + + #[test] + fn teleport_transport_rejects_ssh_auth_flags() { + let mut args = sample_remote_args(); + args.password = Some(SecretString::from("password".to_owned())); + + let err = args + .resolve_auth(RemoteTransport::Teleport) + .expect_err("teleport must reject ssh auth flags"); + assert!(err + .to_string() + .contains("--password/--key-path can only be used with --transport ssh")); + } } diff --git a/hil/src/ssh_wrapper.rs b/hil/src/ssh_wrapper.rs index 3de5dcdee..3e72822c0 100644 --- a/hil/src/ssh_wrapper.rs +++ b/hil/src/ssh_wrapper.rs @@ -1,6 +1,7 @@ +use crate::remote_cmd::CopyDirection; use color_eyre::{eyre::bail, Result}; use secrecy::{ExposeSecret, SecretString}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use tokio::process::Command; use tracing::{debug, info}; @@ -212,6 +213,57 @@ impl SshWrapper { info!("Connection test successful"); Ok(()) } + + pub async fn copy_file( + &self, + local: &Path, + remote: &Path, + direction: CopyDirection, + ) -> Result<()> { + let remote_spec = format!( + "{}@{}:{}", + self.connect_args.username, + self.connect_args.hostname, + remote.display() + ); + + let mut scp = Command::new("scp"); + scp.arg("-o") + .arg(format!("ControlPath={}", self.control_path.display())) + .arg("-o") + .arg("ControlMaster=no") + .arg("-P") + .arg(self.connect_args.port.to_string()) + .arg("-o") + .arg("StrictHostKeyChecking=no") + .arg("-o") + .arg("UserKnownHostsFile=/dev/null") + .arg("-o") + .arg("LogLevel=ERROR"); + + match direction { + CopyDirection::Upload => { + debug!("scp upload: {} -> {}", local.display(), remote_spec); + scp.arg(local).arg(&remote_spec); + } + CopyDirection::Download => { + debug!("scp download: {} -> {}", remote_spec, local.display()); + scp.arg(&remote_spec).arg(local); + } + } + + let output = scp + .output() + .await + .map_err(|e| color_eyre::eyre::eyre!("failed to execute scp: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("scp failed: {}", stderr); + } + + Ok(()) + } } fn make_control_path(connection_id: u64) -> PathBuf { From 8780ca13b9ea7bf6f6c85535e36db4d532fea5af Mon Sep 17 00:00:00 2001 From: vmenge Date: Tue, 31 Mar 2026 13:29:03 +0200 Subject: [PATCH 60/66] chore(connd): propagate non-primary conn changes to avoid staleness (#1126) ## changes ### connd - propagates the active connections message at least every 5 minutes if there is a change on general connectivity state, primary connection, or active connections ### jobs-agent - account for extra publishes on active connections when forcing relay reconnection ## todo - [x] test on an orb --- orb-connd/src/network_manager/mod.rs | 5 +- orb-connd/src/reporters/active_connections.rs | 37 ++++++++++--- orb-jobs-agent/src/conn_change.rs | 53 ++++++++++++++----- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/orb-connd/src/network_manager/mod.rs b/orb-connd/src/network_manager/mod.rs index da2e2c35e..de84b4695 100644 --- a/orb-connd/src/network_manager/mod.rs +++ b/orb-connd/src/network_manager/mod.rs @@ -560,6 +560,9 @@ impl NetworkManager { }); } + // sort to ensure we always return the same order for the same results + out.sort_by(|conn_a, conn_b| conn_a.id.cmp(&conn_b.id)); + Ok(out) } @@ -1047,7 +1050,7 @@ impl ActiveConnState { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct ActiveConn { pub id: String, pub uuid: String, diff --git a/orb-connd/src/reporters/active_connections.rs b/orb-connd/src/reporters/active_connections.rs index f5de8f41f..4453c9e80 100644 --- a/orb-connd/src/reporters/active_connections.rs +++ b/orb-connd/src/reporters/active_connections.rs @@ -1,4 +1,4 @@ -use crate::network_manager::{self, ConnectionState, NetworkManager}; +use crate::network_manager::{self, ActiveConn, ConnectionState, NetworkManager}; use crate::resolved::{HostnameResolution, LinkDnsStatus, Resolved}; use color_eyre::eyre::{bail, Context, ContextCompat}; use color_eyre::Result; @@ -10,7 +10,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::time::{Duration, Instant}; -use tokio::fs; +use tokio::{fs, time}; use tracing::{error, info, warn}; pub struct Args { @@ -45,7 +45,14 @@ pub async fn report(ctx: mini::Ctx) -> Result<()> { .ok() .flatten(); - let report = build_report(&primary_conn, state, &ctx) + let mut active_conns = ctx + .nm + .active_connections() + .await + .inspect_err(|e| warn!("failed to get active connections: {e}")) + .unwrap_or_default(); + + let report = build_report(&primary_conn, state, &active_conns, &ctx) .await .wrap_err("building active connections report")?; @@ -53,10 +60,13 @@ pub async fn report(ctx: mini::Ctx) -> Result<()> { .await .wrap_err("publishing active connections report")?; + let mut update_interval = time::interval(Duration::from_secs(180)); + loop { tokio::select! { Some(_) = state_stream.next() => (), Some(_) = primary_conn_stream.next() => (), + _ = update_interval.tick() => (), }; let new_state = ctx.nm.state().await.wrap_err("failed to get nm state")?; @@ -69,12 +79,23 @@ pub async fn report(ctx: mini::Ctx) -> Result<()> { .ok() .flatten(); - let changed = (new_state != state) || (new_primary_conn != primary_conn); + let new_active_conns = ctx + .nm + .active_connections() + .await + .inspect_err(|e| warn!("failed to get active connections: {e}")) + .unwrap_or_default(); + + let changed = (new_state != state) + || (new_primary_conn != primary_conn) + || (new_active_conns != active_conns); + state = new_state; primary_conn = new_primary_conn; + active_conns = new_active_conns; if changed { - let report = build_report(&primary_conn, state, &ctx) + let report = build_report(&primary_conn, state, &active_conns, &ctx) .await .wrap_err("building active connections report")?; @@ -95,9 +116,9 @@ pub async fn report(ctx: mini::Ctx) -> Result<()> { async fn build_report( primary: &Option, connection_state: ConnectionState, + active_conns: &Vec, ctx: &mini::Ctx, ) -> Result { - let active_conns = ctx.nm.active_connections().await?; let connectivity_uri = ctx.nm.connectivity_check_uri().await?; let hostname = hostname_from_uri(&connectivity_uri).map(str::to_string); @@ -109,7 +130,7 @@ async fn build_report( iface_routes: InterfaceRoutes::from_fs(&ctx.sysfs, &ctx.procfs).await?, }; - for conn in &active_conns { + for conn in active_conns { for iface in &conn.devices { let dns_status = ctx .resolved @@ -143,6 +164,8 @@ async fn build_report( .await .map_err(|e: color_eyre::Report| format!("{e:#}")); + // when the SIM is disabled on emnify, interface will appear as `cdc-wdm0`. + // we convert to `wwan0` to avoid issues with FM's backend let iface = if iface == "cdc-wdm0" { "wwan0" } else { iface }; report.connections.push(Connection { diff --git a/orb-jobs-agent/src/conn_change.rs b/orb-jobs-agent/src/conn_change.rs index c5ed07a8e..fa6c7f9b1 100644 --- a/orb-jobs-agent/src/conn_change.rs +++ b/orb-jobs-agent/src/conn_change.rs @@ -1,6 +1,7 @@ use crate::job_system::client::JobClient; -use color_eyre::Result; +use color_eyre::{eyre::eyre, Result}; use orb_info::OrbId; +use tokio::task; use tracing::{info, warn}; use zenorb::Zenorb; @@ -16,12 +17,40 @@ pub async fn spawn_watcher( .with_name("jobs-agent") .await?; - session - .receiver(client) - .subscriber("connd/oes/active_connections", async |client, sample| { - let active_conns: oes::ActiveConnections = serde_json::from_slice(&sample.payload().to_bytes())?; - let is_online = active_conns.connections.iter().any(|c|c.has_internet); - let primary = active_conns.connections.iter().find(|c|c.primary).map(|c|&c.name); + let sub = session + .declare_subscriber("connd/oes/active_connections") + .await + .map_err(|e| { + eyre!("failed to subscribe to connd/oes/active_connections: {e}") + })?; + + task::spawn(async move { + let mut state = (false, None); + + loop { + let sample = sub.recv_async().await.map_err(|e| { + eyre!( + "failed to receive message from connd/oes/active_connections: {e}" + ) + })?; + + let active_conns: oes::ActiveConnections = + serde_json::from_slice(&sample.payload().to_bytes())?; + + let is_online = active_conns.connections.iter().any(|c| c.has_internet); + let primary = active_conns + .connections + .iter() + .find(|c| c.primary) + .map(|c| &c.name); + + let changed = (is_online != state.0) || (primary != state.1.as_ref()); + state.0 = is_online; + state.1 = primary.cloned(); + + if !changed { + continue; + } match (is_online, primary) { (true, Some(con)) => { @@ -30,18 +59,18 @@ pub async fn spawn_watcher( } (true, None) => { - warn!("detected changed in connectivity, but we have global connectivity but no primary connection. doing nothing"); + warn!("detected changed in connectivity, we have global connectivity but no primary connection. doing nothing"); } (false, _) => { warn!("detected changed in connectivity, but we have no global connectivity. doing nothing"); } } + } - Ok(()) - }) - .run() - .await?; + #[allow(unreachable_code)] + Ok::<(), color_eyre::Report>(()) + }); Ok(session) } From 3db873fb3223f3feceb91baf125d03d2e229e678 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Tue, 31 Mar 2026 16:58:40 +0200 Subject: [PATCH 61/66] feat: return mcu reboot, decrypt mcu firmware, use default persistent path (#1127) also install gnupg system-wide --- hil/src/commands/flash.rs | 15 +++++++-- hil/src/commands/mcu.rs | 58 +++++++++++++++++++++++++++++++--- hil/src/commands/ota/system.rs | 10 +++--- hil/src/main.rs | 2 +- nix/machines/hil-common.nix | 1 + 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/hil/src/commands/flash.rs b/hil/src/commands/flash.rs index 1ce964579..b792911a2 100644 --- a/hil/src/commands/flash.rs +++ b/hil/src/commands/flash.rs @@ -4,6 +4,9 @@ use color_eyre::{ eyre::{bail, ensure, WrapErr}, Result, }; +use std::env; + +use crate::OrbConfig; use orb_s3_helpers::{ExistingFileBehavior, S3Uri}; use rand::{rngs::StdRng, SeedableRng}; use tracing::info; @@ -34,13 +37,14 @@ pub struct Flash { /// If this flag is given, overwites any existing files when downloading the rts. #[arg(long)] overwrite_existing: bool, - /// Path to directory containing persistent .img files to copy to bootloader dir + /// Path to directory containing persistent .img files to copy to bootloader dir. + /// Defaults to /home/$USER/persistent-$ORB_ID #[arg(long)] persistent_img_path: Option, } impl Flash { - pub async fn run(self) -> Result<()> { + pub async fn run(self, orb_config: &OrbConfig) -> Result<()> { let args = self; let existing_file_behavior = if args.overwrite_existing { ExistingFileBehavior::Overwrite @@ -89,10 +93,15 @@ impl Flash { (false, true) => FlashVariant::Hil, (false, false) => FlashVariant::Regular, }; + let persistent_img_path = args.persistent_img_path.or_else(|| { + let home = env::var("HOME").ok()?; + let orb_id = orb_config.orb_id.as_deref()?; + Some(Utf8PathBuf::from(format!("{home}/persistent-{orb_id}"))) + }); crate::rts::flash( variant, &rts_path, - args.persistent_img_path.as_deref().map(|p| p.as_std_path()), + persistent_img_path.as_deref().map(|p| p.as_std_path()), StdRng::from_rng(rand::thread_rng()).unwrap(), ) .await diff --git a/hil/src/commands/mcu.rs b/hil/src/commands/mcu.rs index aaffcefcb..80ba20606 100644 --- a/hil/src/commands/mcu.rs +++ b/hil/src/commands/mcu.rs @@ -6,7 +6,11 @@ //! //! [RM0440]: https://www.st.com/resource/en/reference_manual/rm0440-stm32g4-series-advanced-armbased-32bit-mcus-stmicroelectronics.pdf -use std::{path::PathBuf, sync::Arc, time::Duration}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; use color_eyre::{ eyre::{bail, ensure, eyre, WrapErr as _}, @@ -21,6 +25,8 @@ use probe_rs::{ probe::{Probe, WireProtocol}, Core, MemoryInterface, Permissions, Session, }; +use secrecy::{ExposeSecret as _, SecretString}; +use std::process::Command; use tracing::{debug, info, warn}; // From probe-rs @@ -337,6 +343,9 @@ struct FlashCommand { /// Path to the hex file to flash #[clap(long)] file: PathBuf, + /// Passphrase to decrypt the firmware file (triggers decryption when provided) + #[arg(long)] + passphrase: Option, /// The USB serial number of the probe to use #[clap(long)] serial: Option, @@ -345,6 +354,37 @@ struct FlashCommand { device: Option<(u16, u16)>, } +fn decrypt_firmware( + file: &Path, + passphrase: &SecretString, +) -> Result { + let output = tempfile::Builder::new() + .suffix(".hex") + .tempfile() + .wrap_err("failed to create temp file for decrypted firmware")?; + + let status = Command::new("gpg") + .args([ + "--batch", + "--yes", + "--passphrase", + passphrase.expose_secret(), + "-o", + output.path().to_str().expect("temp path is valid utf8"), + "-d", + file.to_str() + .ok_or_else(|| eyre!("file path is not valid utf8"))?, + ]) + .status() + .wrap_err("failed to spawn gpg")?; + ensure!( + status.success(), + "gpg decryption failed with status: {status}" + ); + + Ok(output) +} + impl FlashCommand { async fn run(self) -> Result<()> { tokio::task::spawn_blocking(|| self.run_blocking()) @@ -355,10 +395,20 @@ impl FlashCommand { fn run_blocking(self) -> Result<()> { ensure!( self.file.exists(), - "hex file does not exist: {}", + "fw file does not exist: {}", self.file.display() ); + let (_temp_file, flash_path) = if let Some(passphrase) = &self.passphrase { + info!("decrypting firmware file..."); + let temp = decrypt_firmware(&self.file, passphrase) + .wrap_err("decryption failed")?; + let path = temp.path().to_path_buf(); + (Some(temp), path) + } else { + (None, self.file.clone()) + }; + let filter = ProbeFilter::new(self.serial, self.device); let probe = get_probe(&filter).wrap_err("failed to get a hardware probe")?; @@ -397,12 +447,12 @@ impl FlashCommand { } }; - info!("flashing file: {}", self.file.display()); + info!("flashing file: {}", flash_path.display()); let progress = FlashProgressBar::new(); let mut options = DownloadOptions::default(); options.progress = FlashProgress::new(progress.callback()); options.verify = true; - download_file_with_options(&mut session, &self.file, Format::Hex, options) + download_file_with_options(&mut session, &flash_path, Format::Hex, options) .wrap_err("failed to flash hex file")?; info!("resetting target..."); diff --git a/hil/src/commands/ota/system.rs b/hil/src/commands/ota/system.rs index aea1261dc..6f3671c54 100644 --- a/hil/src/commands/ota/system.rs +++ b/hil/src/commands/ota/system.rs @@ -12,16 +12,14 @@ const GONDOR_CALLS_FOR_OTA_PATH: &str = "/usr/local/bin/gondor-calls-for-ota"; /// Reboot the Orb device using orb-mcu-util and shutdown pub async fn reboot_orb(session: &RemoteSession) -> Result<()> { session - .execute_command( - "TERM=dumb sudo systemctl stop worldcoin-key-retrieval.service", - ) + .execute_command("TERM=dumb orb-mcu-util reboot orb") .await - .wrap_err("Failed to stop the key retrieval service")?; + .wrap_err("Failed to execute orb-mcu-util reboot orb")?; session - .execute_command("TERM=dumb sudo systemctl reboot") + .execute_command("TERM=dumb sudo shutdown now") .await - .wrap_err("Failed to reboot the orb")?; + .wrap_err("Failed to execute shutdown now")?; Ok(()) } diff --git a/hil/src/main.rs b/hil/src/main.rs index 5c96a1ab3..bab687da0 100644 --- a/hil/src/main.rs +++ b/hil/src/main.rs @@ -68,7 +68,7 @@ async fn main() -> Result<()> { Commands::CopyFrom(c) => c.run(&orb_config).await, Commands::CopyTo(c) => c.run(&orb_config).await, Commands::FetchPersistent(c) => c.run().await, - Commands::Flash(c) => c.run().await, + Commands::Flash(c) => c.run(&orb_config).await, Commands::Login(c) => c.run(&orb_config).await, Commands::Mcu(c) => c.run().await, Commands::Nfsboot(c) => c.run().await, diff --git a/nix/machines/hil-common.nix b/nix/machines/hil-common.nix index 37e9947c4..e728dcc3e 100644 --- a/nix/machines/hil-common.nix +++ b/nix/machines/hil-common.nix @@ -94,6 +94,7 @@ in udev libguestfs-with-appliance abootimg + gnupg (python312.withPackages ( ps: with ps; [ pyyaml From 4ee44c8b48948cc51a25144f4a07896fe9cc8141 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Wed, 1 Apr 2026 15:28:32 +0200 Subject: [PATCH 62/66] test(qr-link): verify empty hash QR is rejected by verify() (#1130) --- Cargo.lock | 22 ++++++++-- qr-link/Cargo.toml | 2 +- qr-link/tests/qr-codes.rs | 86 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24f1931f8..9fd627e44 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8357,7 +8357,7 @@ dependencies = [ "orb-endpoints", "orb-info", "orb-relay-client", - "orb-relay-messages", + "orb-relay-messages 0.0.0 (git+https://github.com/worldcoin/orb-relay-messages.git?rev=301889d17fa0c283bdbad8eb2f5659ee5effb40f)", "orb-relay-test-utils", "orb-speed-test", "orb-telemetry", @@ -8482,7 +8482,7 @@ version = "0.1.0" dependencies = [ "blake3", "data-encoding", - "orb-relay-messages", + "orb-relay-messages 0.0.0 (git+https://github.com/worldcoin/orb-relay-messages.git?rev=c447563af1970aa001a45e741b4b8448eeb5cb99)", "serde", "thiserror 1.0.69", "uuid 1.19.0", @@ -8497,7 +8497,7 @@ dependencies = [ "color-eyre", "derive_more 2.1.0", "flume", - "orb-relay-messages", + "orb-relay-messages 0.0.0 (git+https://github.com/worldcoin/orb-relay-messages.git?rev=301889d17fa0c283bdbad8eb2f5659ee5effb40f)", "orb-relay-test-utils", "secrecy 0.10.3", "tokio", @@ -8521,6 +8521,20 @@ dependencies = [ "tonic-prost-build", ] +[[package]] +name = "orb-relay-messages" +version = "0.0.0" +source = "git+https://github.com/worldcoin/orb-relay-messages.git?rev=c447563af1970aa001a45e741b4b8448eeb5cb99#c447563af1970aa001a45e741b4b8448eeb5cb99" +dependencies = [ + "blake3", + "prost 0.14.1", + "prost-build 0.14.1", + "prost-types 0.14.1", + "tonic 0.14.2", + "tonic-prost", + "tonic-prost-build", +] + [[package]] name = "orb-relay-test-utils" version = "0.0.1" @@ -8528,7 +8542,7 @@ source = "git+https://github.com/worldcoin/orb-relay-messages.git?rev=301889d17f dependencies = [ "async-stream", "flume", - "orb-relay-messages", + "orb-relay-messages 0.0.0 (git+https://github.com/worldcoin/orb-relay-messages.git?rev=301889d17fa0c283bdbad8eb2f5659ee5effb40f)", "pollster", "tokio", "tokio-stream", diff --git a/qr-link/Cargo.toml b/qr-link/Cargo.toml index 29fcdcd6d..438790cc8 100644 --- a/qr-link/Cargo.toml +++ b/qr-link/Cargo.toml @@ -26,7 +26,7 @@ uuid = { version = "1.4.1", features = ["v4"] } [dev-dependencies.orb-relay-messages] features = ["client"] git = "https://github.com/worldcoin/orb-relay-messages.git" -rev = "301889d17fa0c283bdbad8eb2f5659ee5effb40f" +rev = "c447563af1970aa001a45e741b4b8448eeb5cb99" [[test]] name = "qr-codes" diff --git a/qr-link/tests/qr-codes.rs b/qr-link/tests/qr-codes.rs index 2ac7c9ee3..c32ef2634 100644 --- a/qr-link/tests/qr-codes.rs +++ b/qr-link/tests/qr-codes.rs @@ -55,3 +55,89 @@ MCowBQYDK2VuAyEA2boNBmJX4lGkA9kjthS5crXOBxu2BPycKRMakpzgLG4= }; assert!(!incorrect_app_data.verify(parsed_app_data)); } + +#[test] +fn test_empty_hash_qr_decodes_but_verify_rejects() { + let orb_relay_id = Uuid::nil(); + let qr = encode_static_qr(&orb_relay_id, &[] as &[u8]); + + // decode_v4 accepts a 16-byte payload (empty hash slice) + let (version, parsed_id, hash) = decode_qr_with_version(&qr).unwrap(); + assert_eq!(version, 4); + assert_eq!(parsed_id, orb_relay_id); + assert!(hash.is_empty()); + + // but verify() rejects an empty hash + let app_data = AppAuthenticatedData { + identity_commitment: "0xabcd".to_string(), + self_custody_public_key: "key".to_string(), + pcp_version: 3, + os: "Android".to_string(), + os_version: "1.2.3".to_string(), + }; + assert!(!app_data.verify(hash)); +} + +#[test] +fn test_different_pcp_version_fails_verify() { + let orb_relay_id = Uuid::new_v4(); + let app_data = AppAuthenticatedData { + identity_commitment: "0xabcd".to_string(), + self_custody_public_key: "key".to_string(), + pcp_version: 3, + os: "Android".to_string(), + os_version: "1.2.3".to_string(), + }; + let hash = app_data.hash(16); + let qr = encode_static_qr(&orb_relay_id, hash); + let (_, _, parsed_hash) = decode_qr_with_version(&qr).unwrap(); + + let different_app_data = AppAuthenticatedData { + pcp_version: 99, + ..app_data + }; + assert!(!different_app_data.verify(parsed_hash)); +} + +#[test] +fn test_corrupted_hash_fails_verify() { + let orb_relay_id = Uuid::new_v4(); + let app_data = AppAuthenticatedData { + identity_commitment: "0xabcd".to_string(), + self_custody_public_key: "key".to_string(), + pcp_version: 3, + os: "Android".to_string(), + os_version: "1.2.3".to_string(), + }; + let mut hash = app_data.hash(16); + hash[0] ^= 0xFF; + let qr = encode_static_qr(&orb_relay_id, hash); + let (_, _, parsed_hash) = decode_qr_with_version(&qr).unwrap(); + assert!(!app_data.verify(parsed_hash)); +} + +#[test] +fn test_empty_qr_string_is_malformed() { + assert!(decode_qr_with_version("").is_err()); +} + +#[test] +fn test_unsupported_version_is_rejected() { + assert!(decode_qr_with_version("3AAAA").is_err()); +} + +#[test] +fn test_invalid_base64_is_rejected() { + // '4' is valid version prefix, but '!!!' is not valid base64 + assert!(decode_qr_with_version("4!!!").is_err()); +} + +#[test] +fn test_roundtrip_preserves_orb_relay_id() { + for _ in 0..10 { + let id = Uuid::new_v4(); + let qr = encode_static_qr(&id, [0xAB; 16]); + let (_, parsed_id, _) = decode_qr_with_version(&qr).unwrap(); + assert_eq!(id, parsed_id); + } +} From 5265507098b4067caaeb444679f91493b40a7f98 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Wed, 1 Apr 2026 16:17:10 +0200 Subject: [PATCH 63/66] chore(hil): bump orb-hil to beta 23 (#1128) --- nix/packages/orb-hil.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/packages/orb-hil.nix b/nix/packages/orb-hil.nix index 2502daf3a..33761f19e 100644 --- a/nix/packages/orb-hil.nix +++ b/nix/packages/orb-hil.nix @@ -2,11 +2,11 @@ { pkgs }: pkgs.stdenv.mkDerivation rec { pname = "orb-hil"; - version = "0.0.2-beta.22"; + version = "0.0.2-beta.23"; src = pkgs.fetchurl { url = "https://github.com/worldcoin/orb-software/releases/download/orb-hil%2Fv${version}/orb-hil_x86_64"; - sha256 = "sha256-+JqlhaU7q2wCpQV3UMDRBtxmrU2+Ync33iH/OpRiobY="; + sha256 = "sha256-l4DNHmjjPmfig6x5ABsSTRE+/AOkPsDsGpbVHJDTUJs="; }; dontUnpack = true; From 899e81ca7cfa5814c598edd1a0aaa2fbe899c404 Mon Sep 17 00:00:00 2001 From: Adam Date: Wed, 1 Apr 2026 11:04:03 -0700 Subject: [PATCH 64/66] feat: support weth0 status via status connd (#1133) Just adding ETH0 status parsing for ETH0! --- orb-backend-status/src/collectors/net_stats.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/orb-backend-status/src/collectors/net_stats.rs b/orb-backend-status/src/collectors/net_stats.rs index ea5f6ef71..190439f7e 100644 --- a/orb-backend-status/src/collectors/net_stats.rs +++ b/orb-backend-status/src/collectors/net_stats.rs @@ -11,6 +11,7 @@ use tracing::error; const IFACE_WLAN0: &str = "wlan0"; const IFACE_WWAN0: &str = "wwan0"; +const IFACE_ETH0: &str = "eth0"; pub fn spawn_reporter( backend_status: BackendStatusImpl, @@ -61,8 +62,8 @@ pub async fn poll_net_stats(netdev: &Path) -> Result { fn parse_net_stats(net_stats: &str) -> Result { let mut interfaces = Vec::new(); - // Try to parse stats for both WLAN0 and WWAN0 interfaces - for iface_name in [IFACE_WLAN0, IFACE_WWAN0] { + // Try to parse stats for WLAN0, WWAN0, and ETH0 interfaces + for iface_name in [IFACE_WLAN0, IFACE_WWAN0, IFACE_ETH0] { if let Some(interface) = parse_interface_stats(net_stats, iface_name)? { interfaces.push(interface); } From 618f91005f9f5c466992a8225604618ea1ee6a89 Mon Sep 17 00:00:00 2001 From: Vlad Agievich Date: Thu, 2 Apr 2026 11:11:45 +0200 Subject: [PATCH 65/66] chore(hil): add new pearl hil (munich-4) (#1134) --- .github/workflows/deploy-hil.yaml | 2 +- .../worldcoin-hil-munich-4/configuration.nix | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-hil.yaml b/.github/workflows/deploy-hil.yaml index 73f906b51..2c46b51dc 100644 --- a/.github/workflows/deploy-hil.yaml +++ b/.github/workflows/deploy-hil.yaml @@ -11,7 +11,7 @@ on: required: true env: - ALL_TARGETS: '["worldcoin-hil-munich-0","worldcoin-hil-munich-2","worldcoin-hil-munich-3","worldcoin-hil-munich-5","worldcoin-hil-munich-8","worldcoin-hil-munich-9","worldcoin-hil-munich-10","worldcoin-hil-munich-11"]' + ALL_TARGETS: '["worldcoin-hil-munich-0","worldcoin-hil-munich-2","worldcoin-hil-munich-3","worldcoin-hil-munich-4","worldcoin-hil-munich-5","worldcoin-hil-munich-8","worldcoin-hil-munich-9","worldcoin-hil-munich-10","worldcoin-hil-munich-11"]' jobs: prepare: diff --git a/nix/machines/worldcoin-hil-munich-4/configuration.nix b/nix/machines/worldcoin-hil-munich-4/configuration.nix index 449a2c968..b14c539cd 100644 --- a/nix/machines/worldcoin-hil-munich-4/configuration.nix +++ b/nix/machines/worldcoin-hil-munich-4/configuration.nix @@ -15,4 +15,21 @@ ../nixos-common.nix ../hil-common.nix ]; + + worldcoin.orbPlatform = "pearl"; + + environment.etc."worldcoin/orb.yaml" = { + text = '' + orb_id: cf2dbea5 + platform: ${config.worldcoin.orbPlatform} + # Pin controller configuration for orb-hil + # Type of pin controller to use (ftdi, relay) + pin_ctrl_type: usb_relay + serial_path: "usb-FTDI_FT232R_USB_UART_BG02N1MA-if00-port0" + relay_bank: "/dev/hidraw0" + relay_power_channel: 2 + relay_recovery_channel: 1 + ''; + mode = "0644"; + }; } From 2a5730dcea964e300b22bdc81955961945d8c371 Mon Sep 17 00:00:00 2001 From: Popov Philipp Date: Thu, 2 Apr 2026 13:18:52 +0200 Subject: [PATCH 66/66] feat(orb-jobs-agent): some wip --- orb-jobs-agent/src/args.rs | 3 - orb-jobs-agent/src/job_system/client.rs | 182 +++++++++++------------ orb-jobs-agent/src/job_system/handler.rs | 37 +++-- orb-jobs-agent/src/main.rs | 141 +++++++++--------- orb-jobs-agent/src/program.rs | 34 +++-- orb-jobs-agent/tests/common/fixture.rs | 43 ++---- orb-jobs-agent/tests/job_handler.rs | 16 +- 7 files changed, 233 insertions(+), 223 deletions(-) diff --git a/orb-jobs-agent/src/args.rs b/orb-jobs-agent/src/args.rs index 14966613c..ae4df561d 100644 --- a/orb-jobs-agent/src/args.rs +++ b/orb-jobs-agent/src/args.rs @@ -36,15 +36,12 @@ pub struct Args { /// The target job-server service id to send messages to. #[clap(long, env = "TARGET_SERVICE_ID", default_value = "job-server")] pub target_service_id: Option, - /// D-Bus address (defaults to DBUS_SESSION_BUS_ADDRESS or unix:path=/tmp/worldcoin_bus_socket). #[clap( long, env = "DBUS_SESSION_BUS_ADDRESS", default_value = "unix:path=/tmp/worldcoin_bus_socket" )] pub dbus_addr: String, - /// Run a single job document locally instead of connecting to relay. - #[clap(long)] pub run_job: Option, } diff --git a/orb-jobs-agent/src/job_system/client.rs b/orb-jobs-agent/src/job_system/client.rs index 0cf363268..1ab089a7a 100644 --- a/orb-jobs-agent/src/job_system/client.rs +++ b/orb-jobs-agent/src/job_system/client.rs @@ -14,36 +14,37 @@ use orb_relay_messages::{ prost_types::Any, relay::entity::EntityType, }; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; +pub type TransportResult = std::result::Result; + +#[derive(Debug)] +pub enum JobTransportMessage { + Notify, + Execution(JobExecution), + Cancel(JobCancel), +} + #[async_trait] pub trait JobTransport: Send + Sync + std::fmt::Debug { - async fn listen_for_job( - &self, - job_registry: &JobRegistry, - ) -> Result; + async fn recv(&self) -> TransportResult; - async fn request_next_job( - &self, - job_registry: &JobRegistry, - ) -> Result<(), orb_relay_client::Err>; + async fn request_next_job(&self, request: &JobRequestNext) -> TransportResult<()>; - async fn send_job_update( - &self, - update: &JobExecutionUpdate, - ) -> Result<(), orb_relay_client::Err>; + async fn send_job_update(&self, update: &JobExecutionUpdate) + -> TransportResult<()>; async fn reconnect(&self) -> Result<()>; } #[derive(Debug, Clone)] pub struct RelayTransport { - pub relay_client: Client, - pub target_service_id: String, - pub relay_namespace: String, + relay_client: Client, + target_service_id: String, + relay_namespace: String, } impl RelayTransport { @@ -59,10 +60,7 @@ impl RelayTransport { } } - async fn send_request( - &self, - request: &JobRequestNext, - ) -> Result<(), orb_relay_client::Err> { + async fn send_request(&self, request: &JobRequestNext) -> TransportResult<()> { let any = Any::from_msg(request).unwrap(); self.relay_client .send( @@ -78,10 +76,7 @@ impl RelayTransport { #[async_trait] impl JobTransport for RelayTransport { - async fn listen_for_job( - &self, - job_registry: &JobRegistry, - ) -> Result { + async fn recv(&self) -> TransportResult { loop { match self.relay_client.recv().await { Ok(msg) => { @@ -96,10 +91,7 @@ impl JobTransport for RelayTransport { match JobNotify::decode(any.value.as_slice()) { Ok(job_notify) => { info!("received JobNotify: {:?}", job_notify); - let request = build_job_request(job_registry).await; - if let Err(e) = self.send_request(&request).await { - error!("error sending JobRequestNext: {:?}", e); - } + return Ok(JobTransportMessage::Notify); } Err(e) => { error!("error decoding JobNotify: {:?}", e); @@ -115,8 +107,7 @@ impl JobTransport for RelayTransport { should_cancel = job.should_cancel, "received JobExecution" ); - - return Ok(job); + return Ok(JobTransportMessage::Execution(job)); } Err(e) => { error!("error decoding JobExecution: {:?}", e); @@ -129,20 +120,7 @@ impl JobTransport for RelayTransport { job_execution_id = %job_cancel.job_execution_id, "received JobCancel" ); - let cancelled = job_registry - .cancel_job(&job_cancel.job_execution_id) - .await; - if cancelled { - info!( - job_execution_id = %job_cancel.job_execution_id, - "Successfully cancelled job" - ); - } else { - warn!( - job_execution_id = %job_cancel.job_execution_id, - "Attempted to cancel non-existent or already completed job" - ); - } + return Ok(JobTransportMessage::Cancel(job_cancel)); } Err(e) => { error!("error decoding JobCancel: {:?}", e); @@ -154,7 +132,6 @@ impl JobTransport for RelayTransport { } Err(e) => { error!("error receiving from relay: {:?}", e); - return Err(e); } } @@ -163,14 +140,14 @@ impl JobTransport for RelayTransport { async fn request_next_job( &self, - job_registry: &JobRegistry, - ) -> Result<(), orb_relay_client::Err> { - let request = build_job_request(job_registry).await; - self.send_request(&request).await?; + job_request: &JobRequestNext, + ) -> TransportResult<()> { + self.send_request(job_request).await?; + info!( "sent JobRequestNext ignoring {} job execution IDs: {:?}", - request.ignore_job_execution_ids.len(), - request.ignore_job_execution_ids + job_request.ignore_job_execution_ids.len(), + job_request.ignore_job_execution_ids ); Ok(()) @@ -179,7 +156,7 @@ impl JobTransport for RelayTransport { async fn send_job_update( &self, job_update: &JobExecutionUpdate, - ) -> Result<(), orb_relay_client::Err> { + ) -> TransportResult<()> { info!( job_execution_id = %job_update.job_execution_id, job_id = %job_update.job_id, @@ -224,33 +201,28 @@ impl JobTransport for RelayTransport { #[derive(Debug)] pub struct LocalTransport { - pending_job: std::sync::Mutex>, - final_status: std::sync::Mutex>, + pending_job: Mutex>, + terminal_update: Mutex>, shutdown: CancellationToken, } impl LocalTransport { - pub fn new(job: JobExecution) -> (Self, CancellationToken) { - let shutdown = CancellationToken::new(); - let token = shutdown.clone(); - let transport = Self { - pending_job: std::sync::Mutex::new(Some(job)), - final_status: std::sync::Mutex::new(None), - shutdown, - }; - - (transport, token) + pub fn new(job: JobExecution) -> Self { + Self { + pending_job: Mutex::new(Some(job)), + terminal_update: Mutex::new(None), + shutdown: CancellationToken::new(), + } } - pub fn final_status(&self) -> Option { - *self.final_status.lock().unwrap() + pub fn terminal_update(&self) -> Option { + self.terminal_update.lock().unwrap().clone() } - pub fn shutdown_handle(&self) -> JoinHandle> { - let token = self.shutdown.clone(); + pub fn shutdown_handle(&self) -> JoinHandle> { + let shutdown = self.shutdown.clone(); tokio::spawn(async move { - token.cancelled().await; - + shutdown.cancelled().await; Ok(()) }) } @@ -258,10 +230,7 @@ impl LocalTransport { #[async_trait] impl JobTransport for LocalTransport { - async fn listen_for_job( - &self, - _job_registry: &JobRegistry, - ) -> Result { + async fn recv(&self) -> TransportResult { let next_job = self.pending_job.lock().unwrap().take(); if let Some(job) = next_job { @@ -273,41 +242,40 @@ impl JobTransport for LocalTransport { "received local JobExecution" ); - return Ok(job); + return Ok(JobTransportMessage::Execution(job)); } std::future::pending::<()>().await; unreachable!() } - async fn request_next_job( - &self, - _job_registry: &JobRegistry, - ) -> Result<(), orb_relay_client::Err> { + async fn request_next_job(&self, _request: &JobRequestNext) -> TransportResult<()> { Ok(()) } async fn send_job_update( &self, job_update: &JobExecutionUpdate, - ) -> Result<(), orb_relay_client::Err> { + ) -> TransportResult<()> { let status_name = JobExecutionStatus::try_from(job_update.status) - .map(|s| format!("{s:?}")) + .map(|status| format!("{status:?}")) .unwrap_or_else(|_| format!("Unknown({})", job_update.status)); println!("--- Job Update ---"); println!("job_id: {}", job_update.job_id); println!("job_execution_id: {}", job_update.job_execution_id); println!("status: {status_name}"); + if !job_update.std_out.is_empty() { println!("stdout:\n{}", job_update.std_out); } + if !job_update.std_err.is_empty() { eprintln!("stderr:\n{}", job_update.std_err); } if job_update.status != JobExecutionStatus::InProgress as i32 { - *self.final_status.lock().unwrap() = Some(job_update.status); + *self.terminal_update.lock().unwrap() = Some(job_update.clone()); self.shutdown.cancel(); } @@ -339,18 +307,43 @@ impl JobClient { } } - pub async fn listen_for_job(&self) -> Result { - self.transport.listen_for_job(&self.job_registry).await + pub async fn listen_for_job(&self) -> TransportResult { + loop { + match self.transport.recv().await? { + JobTransportMessage::Notify => { + let _ = self.request_next_job().await; + } + JobTransportMessage::Execution(job) => { + return Ok(job); + } + JobTransportMessage::Cancel(job_cancel) => { + let cancelled = self + .job_registry + .cancel_job(&job_cancel.job_execution_id) + .await; + + if cancelled { + info!( + job_execution_id = %job_cancel.job_execution_id, + "Successfully cancelled job" + ); + } else { + warn!( + job_execution_id = %job_cancel.job_execution_id, + "Attempted to cancel non-existent or already completed job" + ); + } + } + } + } } - pub async fn request_next_job(&self) -> Result<(), orb_relay_client::Err> { - self.transport.request_next_job(&self.job_registry).await + pub async fn request_next_job(&self) -> TransportResult<()> { + let job_request = build_job_request(&self.job_registry).await; + self.transport.request_next_job(&job_request).await } - /// Check if we should request more jobs and do so if appropriate. - /// This method is used to implement parallel job execution. - /// Returns `false` if no jobs were requested. - pub async fn try_request_more_jobs(&self) -> Result { + pub async fn try_request_more_jobs(&self) -> TransportResult { if !self .job_config .should_request_more_jobs(&self.job_registry) @@ -371,7 +364,7 @@ impl JobClient { pub async fn send_job_update( &self, job_update: &JobExecutionUpdate, - ) -> Result<(), orb_relay_client::Err> { + ) -> TransportResult<()> { self.transport.send_job_update(job_update).await } @@ -399,6 +392,7 @@ mod tests { #[test] fn test_job_execution_update_creation_for_cancellation() { + // Test that we can create the correct JobExecutionUpdate for cancellation let job_execution = JobExecution { job_id: "test_job_123".to_string(), job_execution_id: "test_execution_456".to_string(), @@ -406,6 +400,7 @@ mod tests { should_cancel: true, }; + // Create the update that main.rs would create for should_cancel = true let cancel_update = JobExecutionUpdate { job_id: job_execution.job_id.clone(), job_execution_id: job_execution.job_execution_id.clone(), @@ -414,6 +409,7 @@ mod tests { std_err: "Job was cancelled".to_string(), }; + // Verify the update has the correct fields assert_eq!(cancel_update.job_id, "test_job_123"); assert_eq!(cancel_update.job_execution_id, "test_execution_456"); assert_eq!(cancel_update.status, JobExecutionStatus::Failed as i32); @@ -423,6 +419,7 @@ mod tests { #[test] fn test_should_cancel_field_detection() { + // Test that we can properly detect should_cancel field let normal_job = JobExecution { job_id: "job1".to_string(), job_execution_id: "exec1".to_string(), @@ -449,6 +446,7 @@ mod tests { #[test] fn test_job_request_with_ignore_ids() { + // Test creating JobRequestNext with ignore IDs directly let ignore_ids = vec![ "job_exec_1".to_string(), "job_exec_2".to_string(), @@ -462,6 +460,7 @@ mod tests { assert_eq!(job_request.ignore_job_execution_ids, ignore_ids); assert_eq!(job_request.ignore_job_execution_ids.len(), 3); + // Test with empty IDs let empty_request = JobRequestNext { ignore_job_execution_ids: vec![], }; @@ -471,6 +470,7 @@ mod tests { #[test] fn test_default_job_request() { + // Test that default JobRequestNext has empty ignore_job_execution_ids let default_request = JobRequestNext::default(); assert!(default_request.ignore_job_execution_ids.is_empty()); } diff --git a/orb-jobs-agent/src/job_system/handler.rs b/orb-jobs-agent/src/job_system/handler.rs index fa790e2b7..da9f9f01a 100644 --- a/orb-jobs-agent/src/job_system/handler.rs +++ b/orb-jobs-agent/src/job_system/handler.rs @@ -1,7 +1,7 @@ use super::ctx::Ctx; use crate::{ job_system::{ - client::{JobClient, JobTransport}, + client::{JobClient, JobTransport, TransportResult}, ctx::JobExecutionUpdateExt, orchestrator::{JobCompletion, JobConfig, JobRegistry, JobStartStatus}, sanitize::{redact_args, redact_job_document, should_sanitize}, @@ -75,9 +75,9 @@ impl JobHandlerBuilder { self, deps: Deps, transport: Arc, - relay_handle: JoinHandle>, + transport_handle: JoinHandle>, ) -> JobHandler { - JobHandler::new(self, deps, transport, relay_handle) + JobHandler::new(self, deps, transport, transport_handle) } } @@ -92,7 +92,7 @@ impl JobHandlerBuilder { /// .parallel("read_file", read_file::handler) /// .parallel("mcu", mcu::handler) /// .parallel_max("logs", 3, logs::handler) -/// .build(deps, transport, relay_handle) +/// .build(deps) /// .run() /// .await; /// ``` @@ -108,7 +108,7 @@ pub struct JobHandler { job_config: JobConfig, job_registry: JobRegistry, pub(crate) job_client: JobClient, - relay_handle: JoinHandle>, + transport_handle: JoinHandle>, handlers: HashMap, } @@ -124,7 +124,7 @@ impl JobHandler { builder: JobHandlerBuilder, deps: Deps, transport: Arc, - relay_handle: JoinHandle>, + transport_handle: JoinHandle>, ) -> Self { let job_registry = JobRegistry::new(); let job_config = builder.job_config; @@ -136,7 +136,7 @@ impl JobHandler { job_config, job_registry, job_client, - relay_handle, + transport_handle, handlers: builder.handlers.into_iter().collect(), } } @@ -165,13 +165,30 @@ impl JobHandler { loop { tokio::select! { - _ = &mut self.relay_handle => { + transport_result = &mut self.transport_handle => { + match transport_result { + Ok(Ok(())) => {} + Ok(Err(e)) => { + error!("Transport shutdown with error: {:?}", e); + } + Err(e) => { + error!("Transport task failed: {:?}", e); + } + } info!("Relay service shutdown detected"); break; } - Ok(job) = self.job_client.listen_for_job() => { - self = self.handle_job(job).await; + result = self.job_client.listen_for_job() => { + match result { + Ok(job) => { + self = self.handle_job(job).await; + } + Err(e) => { + error!("Failed to receive job: {:?}", e); + break; + } + } } } } diff --git a/orb-jobs-agent/src/main.rs b/orb-jobs-agent/src/main.rs index 8e002a4a0..e51c6c208 100644 --- a/orb-jobs-agent/src/main.rs +++ b/orb-jobs-agent/src/main.rs @@ -2,16 +2,19 @@ use clap::Parser; use color_eyre::eyre::{eyre, Context, ContextCompat, Result}; use orb_endpoints::{v1::Endpoints, Backend}; use orb_info::TokenTaskHandle; -use orb_jobs_agent::args::Args; -use orb_jobs_agent::job_system::client::{LocalTransport, RelayTransport}; -use orb_jobs_agent::program::{self, Deps}; use orb_jobs_agent::settings::Settings; use orb_jobs_agent::shell::Host; +use orb_jobs_agent::{ + args::Args, + job_system::client::{JobTransport, LocalTransport, RelayTransport}, + program::{self, Deps, Runtime}, +}; use orb_relay_client::{Auth, Client, ClientOpts}; -use orb_relay_messages::jobs::v1::{JobExecution, JobExecutionStatus}; -use orb_relay_messages::relay::entity::EntityType; -use std::sync::Arc; -use std::time::Duration; +use orb_relay_messages::{ + jobs::v1::{JobExecution, JobExecutionStatus}, + relay::entity::EntityType, +}; +use std::{sync::Arc, time::Duration}; use tokio_util::sync::CancellationToken; use tracing::{info, warn}; @@ -38,13 +41,15 @@ async fn run(args: &Args) -> Result<()> { .await?; let settings = Settings::from_args(args, "/mnt/scratch").await?; - let deps = Deps::new(Host, connection, settings.clone()); match &args.run_job { - Some(job_document) => run_local(deps, job_document).await, - None => run_service(deps, args, &settings).await, + Some(job_document) => run_local(deps, job_document).await?, + None => run_service(deps, args, &settings).await?, } + + info!("Shutting down jobs agent completed"); + Ok(()) } async fn run_local(deps: Deps, job_document: &str) -> Result<()> { @@ -55,28 +60,33 @@ async fn run_local(deps: Deps, job_document: &str) -> Result<()> { should_cancel: false, }; - let (local_transport, _shutdown_token) = LocalTransport::new(job); - let transport = Arc::new(local_transport); - let relay_handle = { - let t = Arc::clone(&transport); - t.shutdown_handle() + let transport = Arc::new(LocalTransport::new(job)); + let runtime = Runtime { + transport: transport.clone(), + transport_handle: transport.shutdown_handle(), + watch_conn_changes: false, }; - program::run(deps, Arc::clone(&transport) as _, relay_handle).await?; + program::run(deps, runtime).await?; - let status = transport - .final_status() + let terminal_update = transport + .terminal_update() .ok_or_else(|| eyre!("local run ended without terminal job status"))?; - if status != JobExecutionStatus::Succeeded as i32 { - let status_name = JobExecutionStatus::try_from(status) - .map(|s| format!("{s:?}")) - .unwrap_or_else(|_| format!("Unknown({status})")); + if terminal_update.status != JobExecutionStatus::Succeeded as i32 { + let status_name = JobExecutionStatus::try_from(terminal_update.status) + .map(|status| format!("{status:?}")) + .unwrap_or_else(|_| format!("Unknown({})", terminal_update.status)); - return Err(eyre!("local job failed with status {status_name}")); - } + if terminal_update.std_err.is_empty() { + return Err(eyre!("local job failed with status {status_name}")); + } - info!("Shutting down jobs agent completed"); + return Err(eyre!( + "local job failed with status {status_name}: {}", + terminal_update.std_err + )); + } Ok(()) } @@ -92,45 +102,7 @@ async fn run_service(deps: Deps, args: &Args, settings: &Settings) -> Result<()> }) .wrap_err("could not get Backend Endpoint from env")?; - let auth = match &args.orb_token { - Some(t) => Auth::Token(t.as_str().into()), - None => { - let shutdown_token = CancellationToken::new(); - let dbus_addr = args.dbus_addr.clone(); - let get_token = async || { - let connection = zbus::ConnectionBuilder::address(dbus_addr.as_str())? - .build() - .await - .map_err(|e| { - eyre!("failed to establish zbus conn at {}: {e}", dbus_addr) - })?; - - TokenTaskHandle::spawn(&connection, &shutdown_token) - .await - .wrap_err("failed to get auth token!") - }; - - let token_rec_fut = async { - loop { - match get_token().await { - Err(e) => { - warn!("{e}! trying again in 5s"); - tokio::time::sleep(Duration::from_secs(5)).await; - continue; - } - Ok(t) => break t.token_recv, - } - } - }; - - let token_rec = - tokio::time::timeout(Duration::from_secs(60), token_rec_fut) - .await - .wrap_err("could not get auth token after 60s")?; - - Auth::TokenReceiver(token_rec) - } - }; + let auth = resolve_auth(args, &deps.session_dbus).await?; let relay_namespace = args .relay_namespace @@ -155,16 +127,47 @@ async fn run_service(deps: Deps, args: &Args, settings: &Settings) -> Result<()> .build(); info!("Connecting to relay: {:?}", relay_host); - let (relay_client, relay_handle) = Client::connect(opts); - let transport = Arc::new(RelayTransport::new( + let (relay_client, transport_handle) = Client::connect(opts); + let transport: Arc = Arc::new(RelayTransport::new( relay_client, target_service_id, relay_namespace, )); + let runtime = Runtime { + transport, + transport_handle, + watch_conn_changes: true, + }; - program::run(deps, transport, relay_handle).await?; + program::run(deps, runtime).await +} - info!("Shutting down jobs agent completed"); +async fn resolve_auth(args: &Args, connection: &zbus::Connection) -> Result { + match &args.orb_token { + Some(token) => Ok(Auth::Token(token.as_str().into())), + None => { + let shutdown = CancellationToken::new(); + let get_token = async || { + TokenTaskHandle::spawn(connection, &shutdown) + .await + .wrap_err("failed to get auth token!") + }; - Ok(()) + let token_recv = tokio::time::timeout(Duration::from_secs(60), async { + loop { + match get_token().await { + Ok(handle) => return handle.token_recv, + Err(e) => { + warn!("{e}! trying again in 5s"); + tokio::time::sleep(Duration::from_secs(5)).await; + } + } + } + }) + .await + .wrap_err("could not get auth token after 60s")?; + + Ok(Auth::TokenReceiver(token_recv)) + } + } } diff --git a/orb-jobs-agent/src/program.rs b/orb-jobs-agent/src/program.rs index 996d40c62..850ed17dc 100644 --- a/orb-jobs-agent/src/program.rs +++ b/orb-jobs-agent/src/program.rs @@ -7,7 +7,10 @@ use crate::{ update_versions, wifi_add, wifi_connect, wifi_ip, wifi_list, wifi_remove, wifi_scan, wipe_downloads, }, - job_system::{client::JobTransport, handler::JobHandler}, + job_system::{ + client::{JobTransport, TransportResult}, + handler::JobHandler, + }, settings::Settings, shell::Shell, }; @@ -22,6 +25,12 @@ pub struct Deps { pub settings: Settings, } +pub struct Runtime { + pub transport: Arc, + pub transport_handle: JoinHandle>, + pub watch_conn_changes: bool, +} + impl Deps { pub fn new(shell: S, session_dbus: zbus::Connection, settings: Settings) -> Self where @@ -35,11 +44,7 @@ impl Deps { } } -pub async fn run( - deps: Deps, - transport: Arc, - relay_handle: JoinHandle>, -) -> Result<()> { +pub async fn run(deps: Deps, runtime: Runtime) -> Result<()> { fs::create_dir_all(&deps.settings.store_path).await?; let orb_id = deps.settings.orb_id.clone(); let zenoh_port = deps.settings.zenoh_port; @@ -75,11 +80,20 @@ pub async fn run( .parallel_max("logs", 3, logs::handler) .sequential("reboot", reboot::handler) .sequential("slot_switch", slot_switch::handler) - .build(deps, transport, relay_handle); + .build(deps, runtime.transport, runtime.transport_handle); - let _zenoh_session = - conn_change::spawn_watcher(orb_id, job_handler.job_client.clone(), zenoh_port) - .await?; + let _zenoh_session = if runtime.watch_conn_changes { + Some( + conn_change::spawn_watcher( + orb_id, + job_handler.job_client.clone(), + zenoh_port, + ) + .await?, + ) + } else { + None + }; job_handler.run().await?; diff --git a/orb-jobs-agent/tests/common/fixture.rs b/orb-jobs-agent/tests/common/fixture.rs index db954d687..96984584f 100644 --- a/orb-jobs-agent/tests/common/fixture.rs +++ b/orb-jobs-agent/tests/common/fixture.rs @@ -8,7 +8,7 @@ use dbus_launch::BusType; use orb_connd_dbus::Connd; use orb_info::OrbId; use orb_jobs_agent::{ - job_system::client::RelayTransport, + job_system::client::{JobTransport, RelayTransport, TransportResult}, program::{self, Deps}, settings::Settings, shell::Shell, @@ -58,33 +58,9 @@ pub struct JobAgentFixture { } impl JobAgentFixture { - pub fn relay_transport(&self) -> RelayTransport { - let opts = ClientOpts::entity(EntityType::Orb) - .id(self.settings.orb_id.to_string()) - .namespace(self.relay_namespace.clone()) - .endpoint(self.relay_host.clone()) - .auth(self.auth.clone()) - .max_connection_attempts(Amount::Val(3)) - .connection_timeout(Duration::from_secs(1)) - .heartbeat(Duration::from_secs(u64::MAX)) - .ack_timeout(Duration::from_secs(1)) - .build(); - - let (relay_client, _handle) = Client::connect(opts); - - RelayTransport::new( - relay_client, - self.target_service_id.clone(), - self.relay_namespace.clone(), - ) - } - pub fn connect_relay( &self, - ) -> ( - Arc, - JoinHandle>, - ) { + ) -> (Arc, JoinHandle>) { let opts = ClientOpts::entity(EntityType::Orb) .id(self.settings.orb_id.to_string()) .namespace(self.relay_namespace.clone()) @@ -96,14 +72,14 @@ impl JobAgentFixture { .ack_timeout(Duration::from_secs(1)) .build(); - let (relay_client, relay_handle) = Client::connect(opts); - let transport = Arc::new(RelayTransport::new( + let (relay_client, transport_handle) = Client::connect(opts); + let transport: Arc = Arc::new(RelayTransport::new( relay_client, self.target_service_id.clone(), self.relay_namespace.clone(), )); - (transport, relay_handle) + (transport, transport_handle) } } @@ -307,13 +283,16 @@ impl JobAgentFixture { .await .unwrap(); - let (transport, relay_handle) = self.connect_relay(); - + let (transport, transport_handle) = self.connect_relay(); let deps = Deps::new(shell, self.dbus_conn.clone(), settings.clone()); let join_handle = task::spawn(async move { tokio::select! { - r = program::run(deps, transport, relay_handle) => { + r = program::run(deps, program::Runtime { + transport, + transport_handle, + watch_conn_changes: true, + }) => { if let Err(e) = r { println!("program::run failed with {e}"); } diff --git a/orb-jobs-agent/tests/job_handler.rs b/orb-jobs-agent/tests/job_handler.rs index c064877ab..60a497e4f 100644 --- a/orb-jobs-agent/tests/job_handler.rs +++ b/orb-jobs-agent/tests/job_handler.rs @@ -19,7 +19,7 @@ async fn sequential_jobs_block_other_jobs_execution() { // Arrange let fx = JobAgentFixture::new().await; let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); - let (transport, relay_handle) = fx.connect_relay(); + let (transport, transport_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(100); @@ -30,7 +30,7 @@ async fn sequential_jobs_block_other_jobs_execution() { Ok(ctx.success().stdout("one")) }) .parallel("second", async |ctx| Ok(ctx.success().stdout("two"))) - .build(deps, transport, relay_handle) + .build(deps, transport, transport_handle) .run(), ); @@ -48,7 +48,7 @@ async fn can_start_parallel_jobs_in_parallel() { // Arrange let fx = JobAgentFixture::new().await; let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); - let (transport, relay_handle) = fx.connect_relay(); + let (transport, transport_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(500); @@ -59,7 +59,7 @@ async fn can_start_parallel_jobs_in_parallel() { Ok(ctx.success().stdout("one")) }) .parallel("second", async |ctx| Ok(ctx.success().stdout("two"))) - .build(deps, transport, relay_handle) + .build(deps, transport, transport_handle) .run(), ); @@ -83,11 +83,11 @@ async fn gracefully_handles_unsupported_cmds() { // Arrange let fx = JobAgentFixture::new().await; let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); - let (transport, relay_handle) = fx.connect_relay(); + let (transport, transport_handle) = fx.connect_relay(); task::spawn( JobHandler::builder() - .build(deps, transport, relay_handle) + .build(deps, transport, transport_handle) .run(), ); @@ -104,7 +104,7 @@ async fn it_cancels_a_long_running_job() { // Arrange let fx = JobAgentFixture::with_namespace("cancel_long_running_job").await; let deps = Deps::new(Host, fx.dbus_conn.clone(), fx.settings.clone()); - let (transport, relay_handle) = fx.connect_relay(); + let (transport, transport_handle) = fx.connect_relay(); let wait_time = Duration::from_millis(50); @@ -128,7 +128,7 @@ async fn it_cancels_a_long_running_job() { Ok(ctx.success().stdout("cancelled succesfully!")) }) - .build(deps, transport, relay_handle) + .build(deps, transport, transport_handle) .run(), );