From 4278944b2b37602dd45b381e6a7f97584a3a05ec Mon Sep 17 00:00:00 2001 From: marcus o'flaherty Date: Mon, 27 Apr 2026 13:58:58 +0000 Subject: [PATCH 1/4] add alarm cooldown to suppress duplicate alarms within a cooldown period. Also bugfix to TimestampToTimestring --- src/ServiceDiscovery/Services.cpp | 68 +++++++++++++++++++++++-------- src/ServiceDiscovery/Services.h | 23 ++++++++--- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/ServiceDiscovery/Services.cpp b/src/ServiceDiscovery/Services.cpp index 0b91f26..4531092 100644 --- a/src/ServiceDiscovery/Services.cpp +++ b/src/ServiceDiscovery/Services.cpp @@ -34,11 +34,12 @@ bool Services::Init(Store &m_variables, zmq::context_t* context_in, SlowControlC bool alerts_send = 0; int alert_send_port = 12242; - bool alerts_receive = 1; + bool alerts_receive = 1; int alert_receive_port = 12243; int sc_port = 60000; mon_merge_period_ms = 1000; multicast_send_period_ms = 5000; + alarm_cooldown_ms = 1000; m_variables.Get("alerts_send", alerts_send); m_variables.Get("alert_send_port", alert_send_port); @@ -47,6 +48,7 @@ bool Services::Init(Store &m_variables, zmq::context_t* context_in, SlowControlC m_variables.Get("sc_port", sc_port); m_variables.Get("mon_merge_period_ms",mon_merge_period_ms); m_variables.Get("multicast_send_period_ms",multicast_send_period_ms); + m_variables.Get("alarm_cooldown_ms",alarm_cooldown_ms); sc_vars->InitThreadedReceiver(m_context, sc_port, 100, new_service, alert_receive_port, alerts_receive, alert_send_port, alerts_send); m_backend_client.SetUp(m_context); @@ -81,9 +83,12 @@ bool Services::Init(Store &m_variables, zmq::context_t* context_in, SlowControlC thread_args.services = this; thread_args.logging_buf = &logging_buf; thread_args.monitoring_buf = &monitoring_buf; + thread_args.alarm_buf = &alarm_buf; thread_args.logging_buf_mtx = &logging_buf_mtx; thread_args.monitoring_buf_mtx = &monitoring_buf_mtx; + thread_args.alarm_buf_mtx = &alarm_buf_mtx; thread_args.multicast_send_period_ms = std::chrono::milliseconds{multicast_send_period_ms}; + thread_args.alarm_cooldown_ms = alarm_cooldown_ms; thread_args.last_send = std::chrono::steady_clock::now(); if(!m_utils.CreateThread("Services", &BufferThread, &thread_args)){ if(m_verbose) std::cerr<<"failed to spawn background thread"< locker(alarm_buf_mtx); + + // check if we sent this alarm already in the last N ms + std::vector::iterator it=alarm_buf.begin(); + while(it!=alarm_buf.end()){ + if(((timestamp>it->timestamp) ? (timestamp-it->timestamp) : (it->timestamp-timestamp))>alarm_cooldown_ms){ + it = alarm_buf.erase(it); // discard if no longer relevant + continue; + } + // if same device and message, do not send it again + if(it->device==device && it->message==message) return true; + ++it; + } + // otherwise add it + alarm_buf.emplace_back(message, device, timestamp); + + locker.unlock(); + + std::string cmd_string = "{\"time\":\""+timestring+"\"" + ",\"device\":\""+name+"\"" + ",\"critical\":"+std::to_string(critical) + ",\"description\":\"" + message + "\"}"; @@ -121,7 +145,7 @@ bool Services::SendAlarm(const std::string& message, bool critical, const std::s // also record it to the logging socket cmd_string = std::string{"{\"topic\":\"LOGGING\""} - + ",\"time\":\""+TimeStringFromUnixMs(timestamp)+"\"" + + ",\"time\":\""+timestring+"\"" + ",\"device\":\""+name+"\"" + ",\"severity\":0" + ",\"message\":\"" + message + "\"}"; @@ -136,7 +160,7 @@ bool Services::SendAlarm(const std::string& message, bool critical, const std::s // ««-------------- ≪ °◇◆◇° ≫ --------------»» -bool Services::SendCalibrationData(const std::string& json_data, const std::string& description, const std::string& name, const uint64_t timestamp, int* version, const unsigned int timeout){ +bool Services::SendCalibrationData(const std::string& json_data, const std::string& description, const std::string& name, uint64_t timestamp, int* version, const unsigned int timeout){ const std::string& c_name = (name=="") ? m_name : name; @@ -174,7 +198,7 @@ bool Services::SendCalibrationData(const std::string& json_data, const std::stri // ««-------------- ≪ °◇◆◇° ≫ --------------»» -bool Services::SendDeviceConfig(const std::string& json_data, const std::string& author, const std::string& description, const std::string& device, const uint64_t timestamp, int* version, const unsigned int timeout){ +bool Services::SendDeviceConfig(const std::string& json_data, const std::string& author, const std::string& description, const std::string& device, uint64_t timestamp, int* version, const unsigned int timeout){ if(version) *version=-1; @@ -215,7 +239,7 @@ bool Services::SendDeviceConfig(const std::string& json_data, const std::string& // ««-------------- ≪ °◇◆◇° ≫ --------------»» -bool Services::SendBaseConfig(const std::string& json_data, const std::string& name, const std::string& author, const std::string& description, const uint64_t timestamp, int* version, const unsigned int timeout){ +bool Services::SendBaseConfig(const std::string& json_data, const std::string& name, const std::string& author, const std::string& description, uint64_t timestamp, int* version, const unsigned int timeout){ if(version) *version=-1; @@ -255,7 +279,7 @@ bool Services::SendBaseConfig(const std::string& json_data, const std::string& n // ««-------------- ≪ °◇◆◇° ≫ --------------»» -bool Services::SendRunModeConfig(const std::string& json_data, const std::string& name, const std::string& author, const std::string& description, const uint64_t timestamp, int* version, const unsigned int timeout){ +bool Services::SendRunModeConfig(const std::string& json_data, const std::string& name, const std::string& author, const std::string& description, uint64_t timestamp, int* version, const unsigned int timeout){ if(version) *version=-1; @@ -294,7 +318,7 @@ bool Services::SendRunModeConfig(const std::string& json_data, const std::string // ««-------------- ≪ °◇◆◇° ≫ --------------»» -bool Services::SendROOTplot(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, int* version, const uint64_t timestamp, const unsigned int lifetime, const unsigned int timeout){ +bool Services::SendROOTplot(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, int* version, uint64_t timestamp, const unsigned int lifetime, const unsigned int timeout){ std::string cmd_string = "{ \"time\":\""+TimeStringFromUnixMs(timestamp)+"\"" + ", \"name\":\""+ plot_name +"\"" @@ -337,7 +361,7 @@ bool Services::SendPlotlyPlot( const std::string& trace, const std::string& layout, int* version, - const uint64_t timestamp, + uint64_t timestamp, const unsigned int lifetime, const unsigned int timeout ) { @@ -360,7 +384,7 @@ bool Services::SendPlotlyPlot( const std::vector& traces, const std::string& layout, int* version, - const uint64_t timestamp, + uint64_t timestamp, const unsigned int lifetime, const unsigned int timeout ) { @@ -934,7 +958,7 @@ bool Services::GetPlotlyPlot(const std::string& name, std::string& trace, std::s // Multicast Senders // ----------------- -bool Services::SendLog(const std::string& message, LogLevel severity, const std::string& device, const uint64_t timestamp){ +bool Services::SendLog(const std::string& message, LogLevel severity, const std::string& device, uint64_t timestamp){ const std::string& name = (device=="") ? m_name : device; @@ -973,7 +997,7 @@ bool Services::SendLog(std::string& msg){ } -bool Services::SendMonitoringData(const std::string& json_data, const std::string& subject, const std::string& device, const uint64_t timestamp){ +bool Services::SendMonitoringData(const std::string& json_data, const std::string& subject, const std::string& device, uint64_t timestamp){ const std::string& name = (device=="") ? m_name : device; @@ -1013,7 +1037,7 @@ bool Services::SendMonitoringData(std::string& msg){ } // send ROOT plot over multicast -bool Services::SendROOTplotMulticast(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, const unsigned int lifetime, const uint64_t timestamp){ +bool Services::SendROOTplotMulticast(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, const unsigned int lifetime, uint64_t timestamp){ std::string cmd_string = std::string{"{\"topic\":\"TROOTPLOT\""} + ", \"time\":\""+TimeStringFromUnixMs(timestamp)+"\"" @@ -1097,14 +1121,14 @@ std::string Services::GetDeviceName(){ // ««-------------- ≪ °◇◆◇° ≫ --------------»» -std::string Services::TimeStringFromUnixMs(const uint64_t timestamp){ +std::string Services::TimeStringFromUnixMs(uint64_t& timestamp){ if(timestamp==1) return "now()"; // remotely interpret 'now' time_t timestamp_sec; // time_t is equivalent to uint64_t uint16_t timestamp_ms = 0; if(timestamp==0){ - timestamp_sec = time(nullptr)*1000; // locally interpret 'now' + timestamp = 1000*time(×tamp_sec); // locally interpret 'now' } else { timestamp_ms = timestamp%1000; timestamp_sec = timestamp/1000; @@ -1253,7 +1277,17 @@ void Services::BufferThread(Thread_args* args){ m_args->monitoring_buf->clear(); // FIXME do we not clear on error...? does it depend on the error...? } - // release monitoring buffer mtx + // our other sevice task: prune the alarm buffer. + // we don't actually send these out here, that still happens in SendAlarm + locker = std::unique_lock(*m_args->alarm_buf_mtx); + + std::vector::iterator it=m_args->alarm_buf->begin(); + while(it!=m_args->alarm_buf->end()){ + if(((time(0)*1000)-it->timestamp)>m_args->alarm_cooldown_ms) it = m_args->alarm_buf->erase(it); + else ++it; + } + + // release mtx locker.unlock(); std::this_thread::sleep_until(m_args->last_send+m_args->multicast_send_period_ms); diff --git a/src/ServiceDiscovery/Services.h b/src/ServiceDiscovery/Services.h index bb84a48..1db5650 100644 --- a/src/ServiceDiscovery/Services.h +++ b/src/ServiceDiscovery/Services.h @@ -42,17 +42,27 @@ namespace ToolFramework { uint64_t timestamp; }; + struct AlarmMsg { + AlarmMsg(const std::string& i_message, const std::string& i_device="", uint64_t i_timestamp=0) : message{i_message}, device{i_device}, timestamp{i_timestamp} {}; + std::string message; + std::string device; + uint64_t timestamp; + }; + class Services; struct BufferThreadArgs : Thread_args { Services* services; std::vector* logging_buf; std::unordered_map* monitoring_buf; + std::vector* alarm_buf; std::mutex* logging_buf_mtx; std::mutex* monitoring_buf_mtx; + std::mutex* alarm_buf_mtx; std::chrono::milliseconds multicast_send_period_ms; std::chrono::steady_clock::time_point last_send; std::string local_merge_buf; + uint32_t alarm_cooldown_ms; }; class Services{ @@ -70,10 +80,10 @@ namespace ToolFramework { bool SQLQuery(const std::string& query, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); // public interface methods - push into local buffer - bool SendLog(const std::string& message, LogLevel severity=LogLevel::Message, const std::string& device="", const uint64_t timestamp=0); + bool SendLog(const std::string& message, LogLevel severity=LogLevel::Message, const std::string& device="", uint64_t timestamp=0); bool SendMonitoringData(const std::string& json_data, const std::string& subject, const std::string& device="", uint64_t timestamp=0); - bool SendAlarm(const std::string& message, bool critical=false, const std::string& device="", const uint64_t timestamp=0, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); + bool SendAlarm(const std::string& message, bool critical=false, const std::string& device="", uint64_t timestamp=0, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool SendCalibrationData(const std::string& json_data, const std::string& description, const std::string& device="", uint64_t timestamp=0, int* version=nullptr, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetCalibrationData(std::string& json_data, int& version, const std::string& device="", const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetCalibrationData(std::string& json_data, int&& version=-1, const std::string& device="", const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); @@ -85,15 +95,15 @@ namespace ToolFramework { bool GetRunModeConfig(std::string& json_data, const std::string& runmode_name, const int version=-1, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetRunDeviceConfig(std::string& json_data, const int base_config_id, const int runmode_config_id, const std::string& device="", int* version=nullptr, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetCachedDeviceConfig(std::string& json_data, const int base_config_id, const int runmode_config_id, const std::string& device="", int* version=nullptr, unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); - bool SendROOTplot(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, int* version=nullptr, const uint64_t timestamp=0, const unsigned int lifetime=5, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); - bool SendROOTplotMulticast(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, const unsigned int lifetime=5, const uint64_t timestamp=0); + bool SendROOTplot(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, int* version=nullptr, uint64_t timestamp=0, const unsigned int lifetime=5, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); + bool SendROOTplotMulticast(const std::string& plot_name, const std::string& draw_options, const std::string& json_data, const unsigned int lifetime=5, uint64_t timestamp=0); bool GetROOTplot(const std::string& plot_name, std::string& draw_option, std::string& json_data, int& version, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetROOTplot(const std::string& plot_name, std::string& draw_option, std::string& json_data, int&& version=-1, const unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool SendPlotlyPlot(const std::string& name, const std::string& json_trace, const std::string& json_layout="{}", int* version=nullptr, uint64_t timestamp=0, const unsigned int lifetime=5, unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool SendPlotlyPlot(const std::string& name, const std::vector& json_traces, const std::string& json_layout="{}", int* version=nullptr, uint64_t timestamp=0, const unsigned int lifetime=5, unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetPlotlyPlot(const std::string& name, std::string& json_trace, std::string& json_layout, int& version, unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); bool GetPlotlyPlot(const std::string& name, std::string& json_trace, std::string& json_layout, int&& version=-1, unsigned int timeout=SERVICES_DEFAULT_TIMEOUT); - static std::string TimeStringFromUnixMs(const uint64_t time); + static std::string TimeStringFromUnixMs(uint64_t& time); std::string GetLocalConfig(); bool SetLocalConfig(std::string json); @@ -138,10 +148,13 @@ namespace ToolFramework { std::vector logging_buf; std::unordered_map monitoring_buf; + std::vector alarm_buf; std::mutex logging_buf_mtx; std::mutex monitoring_buf_mtx; + std::mutex alarm_buf_mtx; uint32_t mon_merge_period_ms; uint32_t multicast_send_period_ms; + uint32_t alarm_cooldown_ms; }; From b5f10400169e332afb660d153aa68f79bbc3a408 Mon Sep 17 00:00:00 2001 From: marcus o'flaherty Date: Tue, 28 Apr 2026 19:39:46 +0000 Subject: [PATCH 2/4] more try/catches --- src/ServiceDiscovery/ServicesBackend.cpp | 74 ++++++++++++++++-------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/src/ServiceDiscovery/ServicesBackend.cpp b/src/ServiceDiscovery/ServicesBackend.cpp index c0ad933..6f75757 100644 --- a/src/ServiceDiscovery/ServicesBackend.cpp +++ b/src/ServiceDiscovery/ServicesBackend.cpp @@ -215,23 +215,32 @@ bool ServicesBackend::InitZMQ(){ // socket to publish write commands // ------------------------------- - clt_pub_socket = new zmq::socket_t(*context, ZMQ_PUB); - clt_pub_socket->setsockopt(ZMQ_LINGER,10); - clt_pub_socket->setsockopt(ZMQ_SNDTIMEO, clt_pub_socket_timeout); - clt_pub_socket->setsockopt(ZMQ_LINGER, 10); - clt_pub_socket->bind(std::string("tcp://*:")+std::to_string(clt_pub_port)); - + try{ + clt_pub_socket = new zmq::socket_t(*context, ZMQ_PUB); + clt_pub_socket->setsockopt(ZMQ_LINGER,10); + clt_pub_socket->setsockopt(ZMQ_SNDTIMEO, clt_pub_socket_timeout); + clt_pub_socket->setsockopt(ZMQ_LINGER, 10); + clt_pub_socket->bind(std::string("tcp://*:")+std::to_string(clt_pub_port)); + } catch(zmq::error_t& e){ + std::cerr<<"ServicesBackend caught "<setsockopt(ZMQ_LINGER,10); - clt_dlr_socket->setsockopt(ZMQ_SNDTIMEO, clt_dlr_socket_timeout); - clt_dlr_socket->setsockopt(ZMQ_RCVTIMEO, clt_dlr_socket_timeout); - clt_dlr_socket->setsockopt(ZMQ_IDENTITY, clt_ID.c_str(), clt_ID.length()); - clt_dlr_socket->setsockopt(ZMQ_IMMEDIATE,1); - clt_dlr_socket->setsockopt(ZMQ_LINGER, 10); - clt_dlr_socket->bind(std::string("tcp://*:")+std::to_string(clt_dlr_port)); + try{ + clt_dlr_socket = new zmq::socket_t(*context, ZMQ_DEALER); + clt_dlr_socket->setsockopt(ZMQ_LINGER,10); + clt_dlr_socket->setsockopt(ZMQ_SNDTIMEO, clt_dlr_socket_timeout); + clt_dlr_socket->setsockopt(ZMQ_RCVTIMEO, clt_dlr_socket_timeout); + clt_dlr_socket->setsockopt(ZMQ_IDENTITY, clt_ID.c_str(), clt_ID.length()); + clt_dlr_socket->setsockopt(ZMQ_IMMEDIATE,1); + clt_dlr_socket->setsockopt(ZMQ_LINGER, 10); + clt_dlr_socket->bind(std::string("tcp://*:")+std::to_string(clt_dlr_port)); + } catch(zmq::error_t& e){ + std::cerr<<"ServicesBackend caught "< response; - dlr_socket_mutex.lock(); - int ret = PollAndReceive(clt_dlr_socket, in_polls.at(0), poll_timeout, response); - dlr_socket_mutex.unlock(); + int ret; + try{ + dlr_socket_mutex.lock(); + ret = PollAndReceive(clt_dlr_socket, in_polls.at(0), poll_timeout, response); + dlr_socket_mutex.unlock(); + }catch(zmq::error_t& e){ + dlr_socket_mutex.unlock(); + std::cerr<<"ServicesBackend caught "<10) std::cout<<"ServicesBackend::SendNextCommand calling PollAndSend" <<", message type: "<10) std::cout<<"ServicesBackend::SendNextCommand send returned "< Date: Wed, 29 Apr 2026 13:57:05 +0000 Subject: [PATCH 3/4] add 'config_name' variable so that devices can use a different name for fetching configs on LoadConfig alert, make default timestamps precise to ms, make ServicesBackend take timeouts as numbers not pointers and change Services default to 0 so it comes from the config file. Only try to join background threead in ServicesBackend if it was started so that we don't die in Finalise --- src/ServiceDiscovery/Services.cpp | 80 ++++++++++++------------ src/ServiceDiscovery/Services.h | 5 +- src/ServiceDiscovery/ServicesBackend.cpp | 9 +-- src/ServiceDiscovery/ServicesBackend.h | 4 +- 4 files changed, 50 insertions(+), 48 deletions(-) diff --git a/src/ServiceDiscovery/Services.cpp b/src/ServiceDiscovery/Services.cpp index 4531092..453eaa8 100644 --- a/src/ServiceDiscovery/Services.cpp +++ b/src/ServiceDiscovery/Services.cpp @@ -40,6 +40,7 @@ bool Services::Init(Store &m_variables, zmq::context_t* context_in, SlowControlC mon_merge_period_ms = 1000; multicast_send_period_ms = 5000; alarm_cooldown_ms = 1000; + config_devicename = ""; m_variables.Get("alerts_send", alerts_send); m_variables.Get("alert_send_port", alert_send_port); @@ -64,7 +65,9 @@ bool Services::Init(Store &m_variables, zmq::context_t* context_in, SlowControlC if(m_verbose) std::cerr<<"device names cannot start with '('"<*)nullptr, &timeout, &err); + bool ok = m_backend_client.SendCommand("W_ALARM", cmd_string, (std::vector*)nullptr, timeout, &err); if(!ok){ if(m_verbose) std::cerr<<"SendAlarm error: "<& resp // Since we don't know what a user-provided query string may be, prepend with a space to ensure this. std::string sanitized_query = std::string{" "}+query; - if(!m_backend_client.SendCommand("W_QUERY", sanitized_query, &responses, &timeout, &err)){ + if(!m_backend_client.SendCommand("W_QUERY", sanitized_query, &responses, timeout, &err)){ if(m_verbose) std::cerr<<"SQLQuery error: "<(std::chrono::system_clock::now().time_since_epoch()).count(); } + timestamp_ms = timestamp%1000; + timestamp_sec = timestamp/1000; struct tm timestruct; gmtime_r(×tamp_sec, ×truct); // FIXME error checking? char timestring[24]; @@ -1168,23 +1170,21 @@ bool Services::LoadConfigAlertFunc(const char* alert, const char* payload){ if(run_mode_config_id!=m_run_mode_config_id || base_config_id!=m_base_config_id){ while(count<5){ - if(!GetCachedDeviceConfig(m_local_config, base_config_id, run_mode_config_id)){ - usleep(100000); - count++; + if(!GetCachedDeviceConfig(m_local_config, base_config_id, run_mode_config_id, config_devicename)){ + usleep(100000); + count++; } else count=99; } if(count==5) return false; - - (*sc_vars)["NewConfig"]->SetValue(1); - m_base_config_id = base_config_id; - m_run_mode_config_id = run_mode_config_id; - } - - - + + (*sc_vars)["NewConfig"]->SetValue(1); + m_base_config_id = base_config_id; + m_run_mode_config_id = run_mode_config_id; + } + return true; - + } std::string Services::LoadConfigSlowControlFunc(const char* control){ @@ -1194,10 +1194,10 @@ std::string Services::LoadConfigSlowControlFunc(const char* control){ tmp.JsonParser(payload); uint64_t base_config_id=0; uint64_t run_mode_config_id=0; - + short count = 0; std::stringstream ret; - + tmp.Get("Base",base_config_id); tmp.Get("RunMode",run_mode_config_id); @@ -1205,7 +1205,7 @@ std::string Services::LoadConfigSlowControlFunc(const char* control){ if(run_mode_config_id!=m_run_mode_config_id || base_config_id!=m_base_config_id){ while(count<5){ - if(!GetCachedDeviceConfig(m_local_config, base_config_id, run_mode_config_id)){ + if(!GetCachedDeviceConfig(m_local_config, base_config_id, run_mode_config_id, config_devicename)){ usleep(100000); count++; } @@ -1223,8 +1223,8 @@ std::string Services::LoadConfigSlowControlFunc(const char* control){ ret <<"Loaded config "< #include -#define SERVICES_DEFAULT_TIMEOUT 1800 +#define SERVICES_DEFAULT_TIMEOUT 0 namespace ToolFramework { enum class LogLevel { Error=0, Warning=1, Message=2, Debug=3, Debug1=4, Debug2=5, Debug3=6 }; struct LogMsg { - LogMsg(const std::string& i_message, LogLevel i_severity=LogLevel::Message, const std::string& i_device="", const uint64_t i_timestamp=0) : message{i_message}, severity{i_severity}, device{i_device}, timestamp{i_timestamp} {}; + LogMsg(const std::string& i_message, LogLevel i_severity=LogLevel::Message, const std::string& i_device="", const uint64_t i_timestamp=0) : message{i_message}, severity{i_severity}, device{i_device}, timestamp{i_timestamp}, repeats{0} {}; std::string message; LogLevel severity; std::string device; @@ -136,6 +136,7 @@ namespace ToolFramework { static void BufferThread(Thread_args* args); std::string m_name; + std::string config_devicename; bool m_verbose; zmq::context_t* m_context; ServicesBackend m_backend_client; diff --git a/src/ServiceDiscovery/ServicesBackend.cpp b/src/ServiceDiscovery/ServicesBackend.cpp index 6f75757..de38928 100644 --- a/src/ServiceDiscovery/ServicesBackend.cpp +++ b/src/ServiceDiscovery/ServicesBackend.cpp @@ -435,13 +435,13 @@ bool ServicesBackend::SendMulticast(MulticastType type, std::string command, std return true; } -bool ServicesBackend::SendCommand(const std::string& topic, const std::string& command, std::vector* results, const uint32_t* timeout_ms, std::string* err){ +bool ServicesBackend::SendCommand(const std::string& topic, const std::string& command, std::vector* results, const uint32_t timeout_ms, std::string* err){ // send a command and receive response. // This is a wrapper that ensures we always return within the requested timeout. if(m_verbosity>10) std::cout<<"ServicesBackend::SendCommand invoked with command '"< resultsvec; @@ -867,7 +867,8 @@ bool ServicesBackend::Finalise(){ terminator.set_value(); // wait for it to finish up and return Log("ServicesBackend waiting for background thread to rejoin",v_debug,m_verbosity); - background_thread.join(); + // if errors during initialise, it may not be running + if(background_thread.joinable()) background_thread.join(); Log("ServicesBackend Removing services",v_debug,m_verbosity); //if(utilities) utilities->RemoveService("slowcontrol_write"); diff --git a/src/ServiceDiscovery/ServicesBackend.h b/src/ServiceDiscovery/ServicesBackend.h index 0867c7d..40266a1 100644 --- a/src/ServiceDiscovery/ServicesBackend.h +++ b/src/ServiceDiscovery/ServicesBackend.h @@ -64,8 +64,8 @@ class ServicesBackend { bool Finalise(); // interfaces called by clients. These return within timeout. - bool SendCommand(const std::string& topic, const std::string& command, std::vector* results=nullptr, const uint32_t* timeout_ms=nullptr, std::string* err=nullptr); - bool SendCommand(const std::string& topic, const std::string& command, std::string* results=nullptr, const uint32_t* timeout_ms=nullptr, std::string* err=nullptr); + bool SendCommand(const std::string& topic, const std::string& command, std::vector* results=nullptr, const uint32_t timeout_ms=0, std::string* err=nullptr); + bool SendCommand(const std::string& topic, const std::string& command, std::string* results=nullptr, const uint32_t timeout_ms=0, std::string* err=nullptr); // multicasts bool SendMulticast(MulticastType type, std::string command, std::string* err=nullptr); From 82c30971daa8e87e9701164744c80c7c71e3c439 Mon Sep 17 00:00:00 2001 From: marcus o'flaherty Date: Wed, 29 Apr 2026 14:21:39 +0000 Subject: [PATCH 4/4] reduce default services backend timeout from 2000 to 300ms to align with old libDAQInterface default --- src/ServiceDiscovery/ServicesBackend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ServiceDiscovery/ServicesBackend.cpp b/src/ServiceDiscovery/ServicesBackend.cpp index de38928..ac76756 100644 --- a/src/ServiceDiscovery/ServicesBackend.cpp +++ b/src/ServiceDiscovery/ServicesBackend.cpp @@ -179,7 +179,7 @@ bool ServicesBackend::InitZMQ(){ inpoll_timeout=500; // total timeout on how long we wait for response from a command - command_timeout=2000; + command_timeout=300; // Update with user-specified values. m_variables.Get("clt_pub_port",clt_pub_port);