Skip to content

Commit 9408c41

Browse files
robstolarzclaude
andcommitted
feat: replace tmate with native SSH debug step for --ssh-after-step
Replace the `mxschmitt/action-tmate@v3` GitHub Action with a simple shell script that pauses the workflow by polling for a magic file. After starting the run, the CLI waits for the sandbox, polls logs for a `::depot-ssh-ready::` marker, then connects via the existing PTY infrastructure. - Inject a shell-based debug step instead of tmate action - Wait for sandbox + log marker before connecting - Fall back to printing SSH info when no TTY is available - Fix job key matching for inline workflows (e.g. `_inline_0.yaml:e2e`) - Improve `printSSHInfo` to show `depot ci ssh` command Closes DEP-3862 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d675591 commit 9408c41

2 files changed

Lines changed: 116 additions & 18 deletions

File tree

pkg/cmd/ci/run.go

Lines changed: 102 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ci
22

33
import (
4+
"context"
45
"crypto/sha256"
56
"encoding/json"
67
"fmt"
@@ -9,6 +10,7 @@ import (
910
"path/filepath"
1011
"regexp"
1112
"strings"
13+
"time"
1214

1315
"github.com/depot/cli/pkg/api"
1416
"github.com/depot/cli/pkg/config"
@@ -49,7 +51,7 @@ This command is in beta and subject to change.`,
4951
# Run a job and connect to its terminal via SSH
5052
depot ci run --workflow .depot/workflows/ci.yml --job build --ssh
5153
52-
# Debug with tmate after a specific step
54+
# Debug with SSH after a specific step (pauses workflow until you continue)
5355
depot ci run --workflow .depot/workflows/ci.yml --job build --ssh-after-step 3`,
5456
RunE: func(cmd *cobra.Command, args []string) error {
5557
if workflowPath == "" {
@@ -166,10 +168,10 @@ This command is in beta and subject to change.`,
166168
}
167169
}
168170

169-
// Insert tmate debug step if requested
171+
// Insert debug pause step if requested
170172
if sshAfterStep > 0 {
171173
jobName := jobNames[0]
172-
if err := injectTmateStep(jobs, jobName, sshAfterStep, patch != nil); err != nil {
174+
if err := injectDebugStep(jobs, jobName, sshAfterStep, patch != nil); err != nil {
173175
return err
174176
}
175177
}
@@ -184,7 +186,7 @@ This command is in beta and subject to change.`,
184186
fmt.Printf("Checking out commit: %s\n", patch.mergeBase)
185187
}
186188
if sshAfterStep > 0 {
187-
fmt.Printf("Inserting tmate step after step %d\n", sshAfterStep)
189+
fmt.Printf("Inserting debug step after step %d\n", sshAfterStep)
188190
}
189191
fmt.Println()
190192

@@ -213,12 +215,35 @@ This command is in beta and subject to change.`,
213215
fmt.Printf("Run: %s\n", resp.RunId)
214216
fmt.Println()
215217

216-
if ssh {
217-
fmt.Printf("Waiting for job to start and connecting via SSH...\n")
218+
if sshAfterStep > 0 || ssh {
219+
if sshAfterStep > 0 {
220+
fmt.Printf("Waiting for debug step to activate...\n")
221+
} else {
222+
fmt.Printf("Waiting for job to start...\n")
223+
}
218224
sandboxID, sessionID, err := waitForSandbox(ctx, tokenVal, orgID, resp.RunId, jobNames[0], "")
219225
if err != nil {
220226
return err
221227
}
228+
229+
// When --ssh-after-step is used, wait for the debug step to
230+
// actually be running before connecting, so the user lands in
231+
// the sandbox after step N has completed.
232+
if sshAfterStep > 0 {
233+
fmt.Fprintf(os.Stderr, "Waiting for step %d to complete...\n", sshAfterStep)
234+
if err := waitForLogMarker(ctx, tokenVal, orgID, resp.RunId, jobNames[0], "::depot-ssh-ready::"); err != nil {
235+
fmt.Fprintf(os.Stderr, "Warning: could not confirm debug step is active: %v\n", err)
236+
fmt.Fprintf(os.Stderr, "Connecting anyway...\n")
237+
}
238+
}
239+
240+
if sshAfterStep > 0 {
241+
fmt.Fprintf(os.Stderr, "Run 'touch /tmp/depot-continue' to resume the workflow. (Your session will not end.)\n")
242+
}
243+
fmt.Fprintf(os.Stderr, "Connecting to sandbox %s...\n", sandboxID)
244+
if !helpers.IsTerminal() {
245+
return printSSHInfo(sandboxID, sessionID, "")
246+
}
222247
return pty.Run(ctx, pty.SessionOptions{
223248
Token: tokenVal,
224249
SandboxID: sandboxID,
@@ -237,7 +262,7 @@ This command is in beta and subject to change.`,
237262
cmd.Flags().StringVar(&token, "token", "", "Depot API token")
238263
cmd.Flags().StringVar(&workflowPath, "workflow", "", "Path to workflow YAML file")
239264
cmd.Flags().StringSliceVar(&jobNames, "job", nil, "Job name(s) to run (repeatable; omit to run all)")
240-
cmd.Flags().IntVar(&sshAfterStep, "ssh-after-step", 0, "1-based step index to insert a tmate debug step after (requires single --job)")
265+
cmd.Flags().IntVar(&sshAfterStep, "ssh-after-step", 0, "1-based step index to pause and connect via SSH after (requires single --job)")
241266
cmd.Flags().BoolVar(&ssh, "ssh", false, "Start the run and connect to the job's sandbox via interactive terminal (requires single --job)")
242267

243268
cmd.AddCommand(NewCmdRunList())
@@ -382,7 +407,7 @@ echo "Patch applied successfully"`, cacheKey, cacheBaseURL),
382407
job["steps"] = newSteps
383408
}
384409

385-
func injectTmateStep(jobs map[string]interface{}, jobName string, afterStep int, patchInjected bool) error {
410+
func injectDebugStep(jobs map[string]interface{}, jobName string, afterStep int, patchInjected bool) error {
386411
jobRaw, ok := jobs[jobName]
387412
if !ok {
388413
return fmt.Errorf("job %q not found", jobName)
@@ -400,11 +425,12 @@ func injectTmateStep(jobs map[string]interface{}, jobName string, afterStep int,
400425
return fmt.Errorf("job %q steps is not a list", jobName)
401426
}
402427

403-
tmateStep := map[string]interface{}{
404-
"uses": "mxschmitt/action-tmate@v3",
405-
"with": map[string]interface{}{
406-
"limit-access-to-actor": "false",
407-
},
428+
debugStep := map[string]interface{}{
429+
"name": "Depot SSH Debug",
430+
"run": "echo '::depot-ssh-ready::'\n" +
431+
"echo 'SSH session active. Run: touch /tmp/depot-continue to resume workflow.'\n" +
432+
"while [ ! -f /tmp/depot-continue ]; do sleep 5; done\n" +
433+
"echo 'Continuing workflow...'",
408434
}
409435

410436
insertAt := afterStep
@@ -434,7 +460,7 @@ func injectTmateStep(jobs map[string]interface{}, jobName string, afterStep int,
434460

435461
newSteps := make([]interface{}, 0, len(steps)+1)
436462
newSteps = append(newSteps, steps[:insertAt]...)
437-
newSteps = append(newSteps, tmateStep)
463+
newSteps = append(newSteps, debugStep)
438464
newSteps = append(newSteps, steps[insertAt:]...)
439465
job["steps"] = newSteps
440466

@@ -478,6 +504,68 @@ func formatStatus(s civ1.CIRunStatus) string {
478504
}
479505
}
480506

507+
// waitForLogMarker polls the job attempt logs until a line containing marker
508+
// appears. This is used to detect when the injected debug step is running.
509+
func waitForLogMarker(ctx context.Context, token, orgID, runID, jobKey, marker string) error {
510+
const pollInterval = 3 * time.Second
511+
const timeout = 10 * time.Minute
512+
513+
deadline := time.Now().Add(timeout)
514+
515+
for {
516+
if time.Now().After(deadline) {
517+
return fmt.Errorf("timed out waiting for log marker (waited %s)", timeout)
518+
}
519+
520+
// Resolve the latest attempt ID for the job.
521+
resp, err := api.CIGetRunStatus(ctx, token, orgID, runID)
522+
if err != nil {
523+
// Transient error, keep polling.
524+
select {
525+
case <-ctx.Done():
526+
return ctx.Err()
527+
case <-time.After(pollInterval):
528+
}
529+
continue
530+
}
531+
532+
targetJob, err := findJob(resp, jobKey, "")
533+
if err != nil {
534+
select {
535+
case <-ctx.Done():
536+
return ctx.Err()
537+
case <-time.After(pollInterval):
538+
}
539+
continue
540+
}
541+
542+
attempt := latestAttempt(targetJob)
543+
if attempt == nil {
544+
select {
545+
case <-ctx.Done():
546+
return ctx.Err()
547+
case <-time.After(pollInterval):
548+
}
549+
continue
550+
}
551+
552+
lines, err := api.CIGetJobAttemptLogs(ctx, token, orgID, attempt.AttemptId)
553+
if err == nil {
554+
for _, line := range lines {
555+
if strings.Contains(line.Body, marker) {
556+
return nil
557+
}
558+
}
559+
}
560+
561+
select {
562+
case <-ctx.Done():
563+
return ctx.Err()
564+
case <-time.After(pollInterval):
565+
}
566+
}
567+
}
568+
481569
func NewCmdRunList() *cobra.Command {
482570
var (
483571
orgID string

pkg/cmd/ci/ssh.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,16 @@ func findJob(resp *civ1.GetRunStatusResponse, jobKey, originalID string) (*civ1.
203203
return nil, &retryableJobError{msg: fmt.Sprintf("run %s has no jobs yet", resp.RunId)}
204204
}
205205

206-
// Match by job key (--job flag).
206+
// Match by job key (--job flag): exact match or short name (after colon).
207+
// Job keys from inline workflows look like "_inline_0.yaml:e2e" — the
208+
// user passes just "e2e", so match on the suffix after the colon too.
207209
if jobKey != "" {
208210
for _, j := range allJobs {
209-
if j.JobKey == jobKey {
211+
short := j.JobKey
212+
if i := strings.IndexByte(short, ':'); i >= 0 {
213+
short = short[i+1:]
214+
}
215+
if j.JobKey == jobKey || short == jobKey {
210216
return j, nil
211217
}
212218
}
@@ -262,8 +268,12 @@ func printSSHInfo(sandboxID, sessionID, output string) error {
262268

263269
fmt.Printf("Host: api.depot.dev\n")
264270
fmt.Printf("User: %s\n", sandboxID)
265-
fmt.Printf("Password: Use your Depot API token ($DEPOT_TOKEN)\n")
271+
fmt.Printf("Password: Your Depot API token ($DEPOT_TOKEN)\n")
272+
fmt.Println()
273+
fmt.Printf("Connect interactively:\n")
274+
fmt.Printf(" depot ci ssh %s\n", sandboxID)
266275
fmt.Println()
267-
fmt.Printf("Connect: ssh %s@api.depot.dev\n", sandboxID)
276+
fmt.Printf("Or via SSH directly:\n")
277+
fmt.Printf(" ssh %s@api.depot.dev\n", sandboxID)
268278
return nil
269279
}

0 commit comments

Comments
 (0)