Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions ntp-admin/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ api_versions!([
// which CRDB panics early during control plane startup because the clocks
// are not synchronized well-enough. We're adding this as part of a
// two-phase rollout to get around #9290 for now.
(3, ADD_DEBUG_ENDPOINT),
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realise the text above this says not to add new versions. If anyone has a better idea of how to surface all of this information I'm all ears

(2, ADD_MAX_ERROR_AND_OFFSET),
(1, INITIAL),
]);
Expand All @@ -23,6 +24,16 @@ api_versions!([
pub trait NtpAdminApi {
type Context;

/// Collect read-only diagnostic information
#[endpoint {
method = GET,
path = "/debug",
versions = VERSION_ADD_DEBUG_ENDPOINT..,
}]
async fn debug(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<latest::debug::DebugInfo>, HttpError>;

/// Query for the state of time synchronization
#[endpoint {
method = GET,
Expand Down
48 changes: 48 additions & 0 deletions ntp-admin/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use dropshot::HttpError;
use dropshot::HttpResponseOk;
use dropshot::RequestContext;
use ntp_admin_api::*;
use ntp_admin_types::debug::DebugInfo;
use ntp_admin_types::timesync::TimeSync;
use slog::info;
use slog_error_chain::InlineErrorChain;
Expand Down Expand Up @@ -189,6 +190,45 @@ impl NtpAdminImpl {
info!(log, "parse_timesync_result"; "result" => ?result);
result
}

async fn debug_get(ctx: &ServerContext) -> Result<DebugInfo, HttpError> {
let log = ctx.log();
info!(log, "collecting NTP zone debug info");

// TODO-K: run the read-only diagnostic commands from the buildomat
// deploy.sh script:

// chronyc -n sources
//
// TODO-K: this is too much?
// cat /etc/inet/chrony.conf

// TODO-K: Check if zone is a boundary NTP zone and if it is then check
// connectivity.
// We proabably want to log whether it is a boundary zone or not

// Check can connect to ntp servers -> they are set as a property value
// on the chrony setup service
//
// For the dig command use the value of:
// svcprop -p config/server svc:/oxide/chrony-setup:default
// or retrieve it from the chrony config? In this case let's assume the
// external NTP server is ntp.eng.oxide.computer
//
// /usr/sbin/dig ntp.eng.oxide.computer @1.1.1.1 (maybe 9.9.9.9 as well?)
// getent hosts ntp.eng.oxide.computer
//
// For the internal NTP zone we may want to use the value of
// svcprop -p config/boundary_pool svc:/oxide/chrony-setup:default
// Let's say it's boundary_ntp.<some-uuid>.oxide.internal
// so we can do:
// `getent hosts boundary_ntp.<some-uuid>.oxide.internal`
//
// The deploy.sh job has some destructive commands (svcadm disable,
// chronyd -dd*). Let's not add those
//
Ok(DebugInfo { data: "DEBUG INFO HERE".to_string() })
}
}

impl NtpAdminApi for NtpAdminImpl {
Expand All @@ -201,6 +241,14 @@ impl NtpAdminApi for NtpAdminImpl {
let response = Self::timesync_get(ctx).await?;
Ok(HttpResponseOk(response))
}

async fn debug(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<DebugInfo>, HttpError> {
let ctx = rqctx.context();
let response = Self::debug_get(ctx).await?;
Ok(HttpResponseOk(response))
}
}

#[cfg(test)]
Expand Down
5 changes: 5 additions & 0 deletions ntp-admin/types/src/debug.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

pub use ntp_admin_types_versions::latest::debug::*;
1 change: 1 addition & 0 deletions ntp-admin/types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
//! The API crate (`ntp-admin-api`) uses fixed identifiers from the versions
//! crate directly.
pub mod debug;
pub mod timesync;
12 changes: 12 additions & 0 deletions ntp-admin/types/versions/src/add_debug_endpoint/debug.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

/// Diagnostic information
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)]
pub struct DebugInfo {
pub data: String,
}
11 changes: 11 additions & 0 deletions ntp-admin/types/versions/src/add_debug_endpoint/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Version `ADD_DEBUG_ENDPOINT` of the NTP Admin API.
//!
//! This version adds a `/debug` endpoint that returns the output of
//! read-only diagnostic commands run inside the NTP zone, intended to aid
//! debugging of time synchronization issues.

pub mod debug;
4 changes: 4 additions & 0 deletions ntp-admin/types/versions/src/latest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@
pub mod timesync {
pub use crate::v2::timesync::TimeSync;
}

pub mod debug {
pub use crate::v3::debug::DebugInfo;
}
2 changes: 2 additions & 0 deletions ntp-admin/types/versions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ pub mod latest;
pub mod v1;
#[path = "add_max_error_and_offset/mod.rs"]
pub mod v2;
#[path = "add_debug_endpoint/mod.rs"]
pub mod v3;
1 change: 1 addition & 0 deletions openapi/ntp-admin/ntp-admin-2.0.0-f8d00a.json.gitstub
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
46677b61d8ee9ec2cf1fa2d48c03f00b2099ca06:openapi/ntp-admin/ntp-admin-2.0.0-f8d00a.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,33 @@
"url": "https://oxide.computer",
"email": "api@oxide.computer"
},
"version": "2.0.0"
"version": "3.0.0"
},
"paths": {
"/debug": {
"get": {
"summary": "Collect read-only diagnostic information",
"operationId": "debug",
"responses": {
"200": {
"description": "successful operation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DebugInfo"
}
}
}
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
}
},
"/timesync": {
"get": {
"summary": "Query for the state of time synchronization",
Expand Down Expand Up @@ -37,6 +61,18 @@
},
"components": {
"schemas": {
"DebugInfo": {
"description": "Diagnostic information",
"type": "object",
"properties": {
"data": {
"type": "string"
}
},
"required": [
"data"
]
},
"Error": {
"description": "Error information from a response.",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion openapi/ntp-admin/ntp-admin-latest.json
2 changes: 2 additions & 0 deletions smf/ntp-admin/manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
<service_fmri value='svc:/oxide/ntp:default' />
</dependency>

<!-- TODO-K: This service should depend on the chrony-setup service -->

<exec_method type='method' name='start'
exec='/opt/oxide/lib/svc/manifest/ntp-admin.sh'
timeout_seconds='0' />
Expand Down
Loading