diff --git a/internal/api/auth_test.go b/internal/api/auth_test.go index c3070b2..0df65c2 100644 --- a/internal/api/auth_test.go +++ b/internal/api/auth_test.go @@ -1,6 +1,7 @@ package api import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -119,7 +120,7 @@ func TestValidateAPIKey_Forbidden(t *testing.T) { func TestValidateAPIKey_ServerError(t *testing.T) { origSleep := retrySleepFn - retrySleepFn = func(d time.Duration) {} // no-op for fast tests + retrySleepFn = func(context.Context, time.Duration) error { return nil } // no-op for fast tests defer func() { retrySleepFn = origSleep }() ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -164,7 +165,7 @@ func TestValidateAPIKey_EmptyResults(t *testing.T) { func TestValidateAPIKey_ConnectionError(t *testing.T) { origSleep := retrySleepFn - retrySleepFn = func(d time.Duration) {} // no-op for fast tests + retrySleepFn = func(context.Context, time.Duration) error { return nil } // no-op for fast tests defer func() { retrySleepFn = origSleep }() // Use a non-existent URL to simulate connection failure. diff --git a/internal/api/client.go b/internal/api/client.go index 2a05034..ebfcf7b 100644 --- a/internal/api/client.go +++ b/internal/api/client.go @@ -112,7 +112,11 @@ type bearerAuthTransport struct { } func (t *bearerAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) { - token, err := t.tokenCache.Token() + // Propagate the request context to the token fetch. KLA-448 — the + // previous implementation called Token() with no context, so a + // short --probe-timeout couldn't reach the OAuth endpoint and the + // caller waited through the http.Client's 30s default. + token, err := t.tokenCache.Token(req.Context()) if err != nil { return nil, fmt.Errorf("failed to obtain bearer token: %w", err) } diff --git a/internal/api/client_test.go b/internal/api/client_test.go index d4d9ee7..9ad9d4b 100644 --- a/internal/api/client_test.go +++ b/internal/api/client_test.go @@ -2,6 +2,7 @@ package api import ( "bytes" + "context" "crypto/tls" "net/http" "net/http/httptest" @@ -436,7 +437,7 @@ func TestLoggingTransport_VerboseLogsError(t *testing.T) { defer resetViper() origSleep := retrySleepFn - retrySleepFn = func(d time.Duration) {} + retrySleepFn = func(context.Context, time.Duration) error { return nil } defer func() { retrySleepFn = origSleep }() viper.Set("verbose", true) diff --git a/internal/api/context_propagation_test.go b/internal/api/context_propagation_test.go new file mode 100644 index 0000000..40017e1 --- /dev/null +++ b/internal/api/context_propagation_test.go @@ -0,0 +1,146 @@ +package api + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" +) + +// Pre-KLA-448 these scenarios would all wait through internal timeouts +// (30s OAuth http.Client, uninterruptible backoff sleep). Post-fix the +// context cancels at every layer. + +// TestTokenCache_TokenRespectsContextTimeout drives the OAuth token +// endpoint with an httptest server that responds slowly, then calls +// Token with a 100ms context. The call must return within ~2s (the +// server's slow-respond cap) — pre-fix it would wait up to 30s (the +// http.Client's default Timeout). +func TestTokenCache_TokenRespectsContextTimeout(t *testing.T) { + slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // 2s is enough to outlast the 100ms ctx deadline + reasonable + // slack but short enough that the test process won't hang if + // cancellation breaks. + select { + case <-time.After(2 * time.Second): + case <-r.Context().Done(): + } + })) + defer slow.Close() + + prev := SetOAuthTokenURL(slow.URL) + defer SetOAuthTokenURL(prev) + + tc := NewTokenCache("client-id", "client-secret") + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + start := time.Now() + _, err := tc.Token(ctx) + elapsed := time.Since(start) + + if err == nil { + t.Fatal("expected error from cancelled context, got nil") + } + if !errors.Is(err, context.DeadlineExceeded) { + t.Errorf("err chain should contain context.DeadlineExceeded so errors.Is callers can handle it; got: %v", err) + } + if elapsed > 2*time.Second { + t.Errorf("Token returned in %v, want fast cancel; pre-fix this would wait up to 30s (server slow-respond was 2s)", elapsed) + } +} + +// TestTokenCache_TokenRespectsContextCancellation tests the +// Ctrl-C / parent-cancel path. Identical structure to the timeout +// case but the cancellation is explicit. +func TestTokenCache_TokenRespectsContextCancellation(t *testing.T) { + slow := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case <-time.After(2 * time.Second): + case <-r.Context().Done(): + } + })) + defer slow.Close() + hung := slow // keep variable name for body below + + prev := SetOAuthTokenURL(hung.URL) + defer SetOAuthTokenURL(prev) + + tc := NewTokenCache("client-id", "client-secret") + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _, err := tc.Token(ctx) + if !errors.Is(err, context.Canceled) { + t.Errorf("err chain should contain context.Canceled; got: %v", err) + } +} + +// TestRetryTransport_BackoffRespectsContext verifies the retry loop's +// backoff sleep no longer blocks past a context deadline. Without the +// fix, a 1s retry backoff would run to completion even with a 50ms +// context — making `jc doctor --probe-timeout 100ms` regularly take +// seconds. +func TestRetryTransport_BackoffRespectsContext(t *testing.T) { + var attemptCount atomic.Int32 + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attemptCount.Add(1) + w.WriteHeader(http.StatusServiceUnavailable) // 503 → retryable + })) + defer ts.Close() + + rt := newRetryTransport(http.DefaultTransport) + client := &http.Client{Transport: rt} + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + req, _ := http.NewRequestWithContext(ctx, "GET", ts.URL, nil) + + start := time.Now() + resp, err := client.Do(req) + elapsed := time.Since(start) + if resp != nil { + resp.Body.Close() + } + + if err == nil { + t.Fatal("expected error when context cancels during backoff, got nil response") + } + // Should fail before the 1s backoff that follows the first retry. + if elapsed > 1*time.Second { + t.Errorf("retry loop ran for %v after context expired; expected ctx-cancel during backoff to short-circuit", elapsed) + } +} + +// TestRetryTransport_PreservesContextErrorInChain pins that the +// retry transport's error return is errors.Is-friendly for +// context.DeadlineExceeded. doctor's classifyProbeError depends on +// this for its "timeout" classification. +func TestRetryTransport_PreservesContextErrorInChain(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer ts.Close() + + rt := newRetryTransport(http.DefaultTransport) + client := &http.Client{Transport: rt} + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + time.Sleep(20 * time.Millisecond) // ensure ctx fires before the request + + req, _ := http.NewRequestWithContext(ctx, "GET", ts.URL, nil) + _, err := client.Do(req) + if !errors.Is(err, context.DeadlineExceeded) { + t.Errorf("err chain should contain context.DeadlineExceeded; got: %v", err) + } +} diff --git a/internal/api/oauth.go b/internal/api/oauth.go index 4aa875c..156646f 100644 --- a/internal/api/oauth.go +++ b/internal/api/oauth.go @@ -1,6 +1,7 @@ package api import ( + "context" "encoding/base64" "encoding/json" "fmt" @@ -58,8 +59,19 @@ func NewTokenCache(clientID, clientSecret string) *TokenCache { } } -// Token returns a valid bearer token, refreshing if expired or not yet fetched. -func (tc *TokenCache) Token() (string, error) { +// Token returns a valid bearer token, refreshing if expired or not yet +// fetched. The context is honored during a fetch — callers with a tight +// deadline (e.g. `jc doctor --probe-timeout 100ms`) get a clean +// context-error return instead of waiting through the http.Client's +// 30s default timeout. KLA-448 closed the context-leak that forced +// jc doctor to wrap its probe in a goroutine. +func (tc *TokenCache) Token(ctx context.Context) (string, error) { + if ctx == nil { + // Defensive — http.NewRequestWithContext panics on nil. Cobra + // invariants give cmd.Context() != nil in production, but tests + // that construct commands without RunE can leave it nil. + ctx = context.Background() + } tc.mu.Lock() defer tc.mu.Unlock() @@ -69,7 +81,7 @@ func (tc *TokenCache) Token() (string, error) { } // Fetch a new token. - token, expiresIn, err := tc.fetchToken() + token, expiresIn, err := tc.fetchToken(ctx) if err != nil { return "", err } @@ -86,13 +98,18 @@ func (tc *TokenCache) ExpiresAt() time.Time { return tc.expiresAt } -// fetchToken exchanges client credentials for a bearer token. -func (tc *TokenCache) fetchToken() (string, int, error) { +// fetchToken exchanges client credentials for a bearer token. The +// context is propagated to the outbound HTTP request so a caller's +// deadline / cancellation reaches the actual socket — pre-KLA-448 +// this used http.NewRequest (no context) and the request would run +// to the http.Client's 30s timeout regardless of what the caller +// asked for. +func (tc *TokenCache) fetchToken(ctx context.Context) (string, int, error) { data := url.Values{} data.Set("grant_type", "client_credentials") data.Set("scope", "api") - req, err := http.NewRequest("POST", oauthTokenURL, strings.NewReader(data.Encode())) + req, err := http.NewRequestWithContext(ctx, "POST", oauthTokenURL, strings.NewReader(data.Encode())) if err != nil { return "", 0, fmt.Errorf("failed to create token request: %w", err) } diff --git a/internal/api/oauth_test.go b/internal/api/oauth_test.go index 1a4608a..1342a14 100644 --- a/internal/api/oauth_test.go +++ b/internal/api/oauth_test.go @@ -1,6 +1,7 @@ package api import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -52,7 +53,7 @@ func TestTokenCache_FetchToken_Success(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("test-client-id", "test-client-secret") - token, err := tc.Token() + token, err := tc.Token(context.Background()) if err != nil { t.Fatalf("Token() error: %v", err) } @@ -81,13 +82,13 @@ func TestTokenCache_CachesToken(t *testing.T) { tc := NewTokenCache("client-id", "client-secret") // First call should fetch. - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err != nil { t.Fatalf("first Token() error: %v", err) } // Second call should use cache. - token, err := tc.Token() + token, err := tc.Token(context.Background()) if err != nil { t.Fatalf("second Token() error: %v", err) } @@ -125,7 +126,7 @@ func TestTokenCache_RefreshesExpiredToken(t *testing.T) { tc := NewTokenCache("client-id", "client-secret") // First call. - _, _ = tc.Token() + _, _ = tc.Token(context.Background()) if callCount != 1 { t.Fatalf("expected 1 call after first Token(), got %d", callCount) } @@ -134,7 +135,7 @@ func TestTokenCache_RefreshesExpiredToken(t *testing.T) { fakeNow = fakeNow.Add(3631 * time.Second) // Should refresh. - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err != nil { t.Fatalf("Token() error after expiry: %v", err) } @@ -155,7 +156,7 @@ func TestTokenCache_InvalidCredentials(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("bad-id", "bad-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err == nil { t.Fatal("expected error for invalid credentials") } @@ -176,7 +177,7 @@ func TestTokenCache_ForbiddenCredentials(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("client-id", "client-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err == nil { t.Fatal("expected error for forbidden credentials") } @@ -197,7 +198,7 @@ func TestTokenCache_ServerError(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("client-id", "client-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err == nil { t.Fatal("expected error for server error") } @@ -222,7 +223,7 @@ func TestTokenCache_EmptyAccessToken(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("client-id", "client-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err == nil { t.Fatal("expected error for empty access token") } @@ -252,7 +253,7 @@ func TestTokenCache_DefaultExpiresIn(t *testing.T) { defer func() { nowFunc = origNow }() tc := NewTokenCache("client-id", "client-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err != nil { t.Fatalf("Token() error: %v", err) } @@ -278,7 +279,7 @@ func TestTokenCache_ConnectionError(t *testing.T) { defer func() { oauthTokenURL = orig }() tc := NewTokenCache("client-id", "client-secret") - _, err := tc.Token() + _, err := tc.Token(context.Background()) if err == nil { t.Fatal("expected error for connection failure") } diff --git a/internal/api/retry.go b/internal/api/retry.go index bcb49fe..d936943 100644 --- a/internal/api/retry.go +++ b/internal/api/retry.go @@ -1,6 +1,7 @@ package api import ( + "context" "io" "math" "math/rand/v2" @@ -16,13 +17,29 @@ const ( maxBackoff = 30 * time.Second ) -// retrySleepFn is the default sleep function for retry backoff. -// Tests can override this to avoid real delays. -var retrySleepFn = time.Sleep +// retrySleepFn waits for d, or returns ctx.Err() if the context fires +// first. Pre-KLA-448 this was a plain func(time.Duration) and a +// caller's deadline couldn't cancel the backoff sleep — so a hung +// server + retry loop kept running long past --probe-timeout. +// Overridable for tests that want instant retries. +var retrySleepFn = func(ctx context.Context, d time.Duration) error { + if d <= 0 { + return nil + } + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-timer.C: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} -// SetRetrySleepFn overrides the retry backoff sleep function. -// Used by cross-package tests to disable delays. Returns the previous function. -func SetRetrySleepFn(fn func(time.Duration)) func(time.Duration) { +// SetRetrySleepFn overrides the retry backoff function. +// Used by cross-package tests to disable real delays. Returns the +// previous function so the caller can restore it on cleanup. +func SetRetrySleepFn(fn func(context.Context, time.Duration) error) func(context.Context, time.Duration) error { prev := retrySleepFn retrySleepFn = fn return prev @@ -36,7 +53,7 @@ func SetRetrySleepFn(fn func(time.Duration)) func(time.Duration) { type retryTransport struct { base http.RoundTripper maxRetries int - sleepFn func(time.Duration) // overridable for testing + sleepFn func(context.Context, time.Duration) error // overridable for testing } func newRetryTransport(base http.RoundTripper) *retryTransport { @@ -62,7 +79,9 @@ func (t *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { // Connection-level error — retry. if err != nil { if attempt < t.maxRetries { - t.backoff(attempt) + if berr := t.backoff(req.Context(), attempt); berr != nil { + return nil, berr + } continue } return nil, err @@ -76,7 +95,9 @@ func (t *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { // Retryable status code — drain body and retry. if attempt < t.maxRetries { drainAndClose(resp) - t.backoff(attempt) + if berr := t.backoff(req.Context(), attempt); berr != nil { + return nil, berr + } continue } @@ -87,16 +108,20 @@ func (t *retryTransport) RoundTrip(req *http.Request) (*http.Response, error) { return resp, err } -// backoff sleeps for an exponentially increasing duration with jitter. +// backoff waits for an exponentially increasing duration with jitter, +// or returns ctx.Err() if the context fires first. KLA-448 — pre-fix +// the wait was uninterruptible, so a caller's --probe-timeout could +// not cancel a retry loop in progress. +// // Formula: min(2^attempt * 1s + jitter, 30s) -func (t *retryTransport) backoff(attempt int) { +func (t *retryTransport) backoff(ctx context.Context, attempt int) error { base := time.Duration(math.Pow(2, float64(attempt))) * time.Second jitter := time.Duration(rand.Int64N(int64(time.Second))) wait := base + jitter if wait > maxBackoff { wait = maxBackoff } - t.sleepFn(wait) + return t.sleepFn(ctx, wait) } // isRetryableStatus returns true for HTTP status codes that should trigger a retry. diff --git a/internal/api/retry_test.go b/internal/api/retry_test.go index b41097d..6e6e0a6 100644 --- a/internal/api/retry_test.go +++ b/internal/api/retry_test.go @@ -46,7 +46,7 @@ func TestRetryTransport_RetriesOnServerError(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} // no-op sleep for fast tests + rt.sleepFn = func(context.Context, time.Duration) error { return nil } // no-op sleep for fast tests client := &http.Client{Transport: &authTransport{apiKey: "test", base: &loggingTransport{base: rt, apiKey: "test"}}} @@ -78,7 +78,7 @@ func TestRetryTransport_RetriesOn429(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} + rt.sleepFn = func(context.Context, time.Duration) error { return nil } client := &http.Client{Transport: rt} resp, err := client.Get(ts.URL + "/test") @@ -117,7 +117,7 @@ func TestRetryTransport_RetriesOnBadGateway(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} + rt.sleepFn = func(context.Context, time.Duration) error { return nil } client := &http.Client{Transport: rt} resp, err := client.Get(ts.URL + "/test") @@ -152,7 +152,7 @@ func TestRetryTransport_NoRetryOn4xx(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} + rt.sleepFn = func(context.Context, time.Duration) error { return nil } client := &http.Client{Transport: rt} resp, err := client.Get(ts.URL + "/test") @@ -178,7 +178,7 @@ func TestRetryTransport_MaxRetriesExceeded(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} + rt.sleepFn = func(context.Context, time.Duration) error { return nil } client := &http.Client{Transport: rt} resp, err := client.Get(ts.URL + "/test") @@ -206,8 +206,9 @@ func TestRetryTransport_BackoffDurations(t *testing.T) { defer ts.Close() rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) { + rt.sleepFn = func(_ context.Context, d time.Duration) error { sleepDurations = append(sleepDurations, d) + return nil } client := &http.Client{Transport: rt} @@ -245,7 +246,7 @@ func TestRetryTransport_ContextCancellation(t *testing.T) { cancel() // Cancel immediately. rt := newRetryTransport(http.DefaultTransport) - rt.sleepFn = func(d time.Duration) {} + rt.sleepFn = func(context.Context, time.Duration) error { return nil } req, _ := http.NewRequestWithContext(ctx, http.MethodGet, ts.URL+"/test", nil) _, err := rt.RoundTrip(req) diff --git a/internal/cmd/auth.go b/internal/cmd/auth.go index 76af220..2814259 100644 --- a/internal/cmd/auth.go +++ b/internal/cmd/auth.go @@ -267,7 +267,7 @@ func runAuthLoginServiceAccount(cmd *cobra.Command, profileFlag string, input In existingSecret := config.ClientSecret() if existingID != "" && existingSecret != "" { tc := api.NewTokenCache(existingID, existingSecret) - if _, err := tc.Token(); err == nil { + if _, err := tc.Token(cmd.Context()); err == nil { fmt.Fprintf(cmd.OutOrStdout(), "Already authenticated as service account (profile: %s)\n", profile) return nil } @@ -303,7 +303,7 @@ func runAuthLoginServiceAccount(cmd *cobra.Command, profileFlag string, input In // Validate by obtaining a token. fmt.Fprintf(cmd.ErrOrStderr(), "Obtaining bearer token...") tc := api.NewTokenCache(clientID, clientSecret) - _, err = tc.Token() + _, err = tc.Token(cmd.Context()) if err != nil { fmt.Fprintln(cmd.ErrOrStderr()) return fmt.Errorf("authentication failed: %w", err) @@ -423,7 +423,7 @@ func runAuthStatus(cmd *cobra.Command, args []string) error { if clientID != "" && clientSecret != "" { tc := api.NewTokenCache(clientID, clientSecret) - _, tokenErr := tc.Token() + _, tokenErr := tc.Token(cmd.Context()) if tokenErr == nil { status.Authenticated = true status.ClientID = clientID diff --git a/internal/cmd/doctor.go b/internal/cmd/doctor.go index 99320da..0a88a3e 100644 --- a/internal/cmd/doctor.go +++ b/internal/cmd/doctor.go @@ -327,22 +327,20 @@ func collectMCP() mcpSection { // jc uses for every other API call — so it exercises whichever auth // method the active profile is configured for (api_key OR // service_account OAuth). The status field distinguishes "ok" from -// "auth_failed" from "unreachable" so the operator can tell "DNS/TLS -// works but the key is wrong" from "the host is unreachable" — that's -// the whole reason this command exists. +// "auth_failed" from "unreachable" from "timeout" so the operator can +// tell "DNS/TLS works but the key is wrong" from "the host is +// unreachable" — that's the whole reason this command exists. // // We don't HEAD the API root: JumpCloud's edge returns 404 there, // indistinguishable from a real outage. `GET /v2/usergroups?limit=1` // is a few KB and hits a real handler. // -// Bugbot on PR #42 caught the original hand-rolled HTTP probe which -// only sent `x-api-key` — a valid service_account profile would -// report `auth_failed` even though every other jc command worked. -// Routing through api.NewV2Client fixes that without leaking the -// client-construction error into our return shape. -// -// Never returns a non-nil error to the caller; failures are encoded -// in the report so JSON consumers always get a parseable response. +// Post-KLA-448 simplification: the api package now propagates context +// through the OAuth token-fetch + retry backoff, so a short +// --probe-timeout cleanly cancels every layer. No more goroutine +// wrapper — just call client.ListAll(ctx, ...) directly and trust +// the context. classifyProbeError handles DeadlineExceeded / +// Canceled → "timeout" classification. func runAPIProbe(parentCtx context.Context, timeout time.Duration) *apiProbe { if timeout <= 0 { timeout = 5 * time.Second @@ -361,48 +359,19 @@ func runAPIProbe(parentCtx context.Context, timeout time.Duration) *apiProbe { } } - // Run ListAll in a goroutine so the probe itself respects - // --probe-timeout even when the underlying call doesn't. Bugbot - // caught that for service_account profiles, the OAuth token-fetch - // hop uses TokenCache.fetchToken() which has its own 30s http.Client - // and doesn't honor the probe context — so a hung OAuth endpoint - // could make `jc doctor --probe-timeout 100ms` run 30+ seconds. - // - // The leaked goroutine dies with the process (jc doctor exits as - // soon as we print); we don't try to cancel the underlying HTTP call, - // just return early so the operator sees the timeout they asked for. - type result struct { - err error - } - resultCh := make(chan result, 1) - go func() { - _, listErr := client.ListAll(ctx, "/usergroups", api.V2ListOptions{Limit: 1}) - resultCh <- result{err: listErr} - }() - - select { - case r := <-resultCh: - latency := time.Since(start) - probe := classifyProbeError(r.err) - probe.LatencyMS = latency.Milliseconds() - return probe - case <-ctx.Done(): - latency := time.Since(start) - // Distinguish "my --probe-timeout fired" from "the parent - // context fired" (global --timeout, signal cancel, etc.). - // Pre-fix, the error always blamed --probe-timeout — Bugbot - // flagged that operators would raise the wrong flag while the - // global deadline still applied. - errMsg := fmt.Sprintf("probe deadline (%s) exceeded; OAuth token-fetch or upstream HTTP may not honor the context — increase --probe-timeout or use --no-probe", timeout) - if parentCtx.Err() != nil { - errMsg = fmt.Sprintf("parent context deadline expired before --probe-timeout (%s); raise the global --timeout or use --no-probe", timeout) - } - return &apiProbe{ - Status: "timeout", - LatencyMS: latency.Milliseconds(), - Error: errMsg, - } + _, err = client.ListAll(ctx, "/usergroups", api.V2ListOptions{Limit: 1}) + latency := time.Since(start) + probe := classifyProbeError(err) + probe.LatencyMS = latency.Milliseconds() + + // When the timeout classification fires AND the parent context + // fired first, point the operator at the global --timeout rather + // than --probe-timeout. (Bugbot PR #42 #11 — operator was being + // told to raise the wrong flag.) + if probe.Status == "timeout" && parentCtx.Err() != nil { + probe.Error = fmt.Sprintf("parent context deadline expired before --probe-timeout (%s); raise the global --timeout or use --no-probe", timeout) } + return probe } // classifyProbeError turns a (client.ListAll) result into the diff --git a/internal/cmd/setup.go b/internal/cmd/setup.go index 9ca9901..40a2f04 100644 --- a/internal/cmd/setup.go +++ b/internal/cmd/setup.go @@ -283,7 +283,7 @@ func (wiz *setupWizard) authServiceAccount(profile string) (*api.Organization, e // Obtain token. fmt.Fprintf(wiz.w, "Obtaining bearer token...") tc := api.NewTokenCache(clientID, clientSecret) - _, err = tc.Token() + _, err = tc.Token(wiz.cmd.Context()) if err != nil { fmt.Fprintln(wiz.w) return nil, fmt.Errorf("authentication failed: %w", err) diff --git a/internal/mcp/apps_test.go b/internal/mcp/apps_test.go index 24ee0b9..5286f60 100644 --- a/internal/mcp/apps_test.go +++ b/internal/mcp/apps_test.go @@ -299,7 +299,7 @@ func TestDashboardTool_Filtering(t *testing.T) { func TestDashboardTool_PartialFailure(t *testing.T) { setupToolTest(t) - origSleep := api.SetRetrySleepFn(func(time.Duration) {}) + origSleep := api.SetRetrySleepFn(func(context.Context, time.Duration) error { return nil }) t.Cleanup(func() { api.SetRetrySleepFn(origSleep) }) origNow := nowFunc @@ -376,7 +376,7 @@ func TestDashboardTool_PartialFailure(t *testing.T) { func TestDashboardTool_TotalFailure(t *testing.T) { setupToolTest(t) - origSleep := api.SetRetrySleepFn(func(time.Duration) {}) + origSleep := api.SetRetrySleepFn(func(context.Context, time.Duration) error { return nil }) t.Cleanup(func() { api.SetRetrySleepFn(origSleep) }) origNow := nowFunc