11package ci
22
33import (
4+ "context"
45 "crypto/sha256"
56 "encoding/json"
67 "fmt"
@@ -9,6 +10,7 @@ import (
910 "path/filepath"
1011 "regexp"
1112 "strings"
13+ "time"
1214
1315 "github.com/depot/cli/pkg/api"
1416 "github.com/depot/cli/pkg/config"
@@ -49,7 +51,7 @@ This command is in beta and subject to change.`,
4951 # Run a job and connect to its terminal via SSH
5052 depot ci run --workflow .depot/workflows/ci.yml --job build --ssh
5153
52- # Debug with tmate after a specific step
54+ # Debug with SSH after a specific step (pauses workflow until you continue)
5355 depot ci run --workflow .depot/workflows/ci.yml --job build --ssh-after-step 3` ,
5456 RunE : func (cmd * cobra.Command , args []string ) error {
5557 if workflowPath == "" {
@@ -166,10 +168,10 @@ This command is in beta and subject to change.`,
166168 }
167169 }
168170
169- // Insert tmate debug step if requested
171+ // Insert debug pause step if requested
170172 if sshAfterStep > 0 {
171173 jobName := jobNames [0 ]
172- if err := injectTmateStep (jobs , jobName , sshAfterStep , patch != nil ); err != nil {
174+ if err := injectDebugStep (jobs , jobName , sshAfterStep , patch != nil ); err != nil {
173175 return err
174176 }
175177 }
@@ -184,7 +186,7 @@ This command is in beta and subject to change.`,
184186 fmt .Printf ("Checking out commit: %s\n " , patch .mergeBase )
185187 }
186188 if sshAfterStep > 0 {
187- fmt .Printf ("Inserting tmate step after step %d\n " , sshAfterStep )
189+ fmt .Printf ("Inserting debug step after step %d\n " , sshAfterStep )
188190 }
189191 fmt .Println ()
190192
@@ -213,12 +215,35 @@ This command is in beta and subject to change.`,
213215 fmt .Printf ("Run: %s\n " , resp .RunId )
214216 fmt .Println ()
215217
216- if ssh {
217- fmt .Printf ("Waiting for job to start and connecting via SSH...\n " )
218+ if sshAfterStep > 0 || ssh {
219+ if sshAfterStep > 0 {
220+ fmt .Printf ("Waiting for debug step to activate...\n " )
221+ } else {
222+ fmt .Printf ("Waiting for job to start...\n " )
223+ }
218224 sandboxID , sessionID , err := waitForSandbox (ctx , tokenVal , orgID , resp .RunId , jobNames [0 ], "" )
219225 if err != nil {
220226 return err
221227 }
228+
229+ // When --ssh-after-step is used, wait for the debug step to
230+ // actually be running before connecting, so the user lands in
231+ // the sandbox after step N has completed.
232+ if sshAfterStep > 0 {
233+ fmt .Fprintf (os .Stderr , "Waiting for step %d to complete...\n " , sshAfterStep )
234+ if err := waitForLogMarker (ctx , tokenVal , orgID , resp .RunId , jobNames [0 ], "::depot-ssh-ready::" ); err != nil {
235+ fmt .Fprintf (os .Stderr , "Warning: could not confirm debug step is active: %v\n " , err )
236+ fmt .Fprintf (os .Stderr , "Connecting anyway...\n " )
237+ }
238+ }
239+
240+ if sshAfterStep > 0 {
241+ fmt .Fprintf (os .Stderr , "Run 'touch /tmp/depot-continue' to resume the workflow. (Your session will not end.)\n " )
242+ }
243+ fmt .Fprintf (os .Stderr , "Connecting to sandbox %s...\n " , sandboxID )
244+ if ! helpers .IsTerminal () {
245+ return printSSHInfo (sandboxID , sessionID , "" )
246+ }
222247 return pty .Run (ctx , pty.SessionOptions {
223248 Token : tokenVal ,
224249 SandboxID : sandboxID ,
@@ -237,7 +262,7 @@ This command is in beta and subject to change.`,
237262 cmd .Flags ().StringVar (& token , "token" , "" , "Depot API token" )
238263 cmd .Flags ().StringVar (& workflowPath , "workflow" , "" , "Path to workflow YAML file" )
239264 cmd .Flags ().StringSliceVar (& jobNames , "job" , nil , "Job name(s) to run (repeatable; omit to run all)" )
240- cmd .Flags ().IntVar (& sshAfterStep , "ssh-after-step" , 0 , "1-based step index to insert a tmate debug step after (requires single --job)" )
265+ cmd .Flags ().IntVar (& sshAfterStep , "ssh-after-step" , 0 , "1-based step index to pause and connect via SSH after (requires single --job)" )
241266 cmd .Flags ().BoolVar (& ssh , "ssh" , false , "Start the run and connect to the job's sandbox via interactive terminal (requires single --job)" )
242267
243268 cmd .AddCommand (NewCmdRunList ())
@@ -382,7 +407,7 @@ echo "Patch applied successfully"`, cacheKey, cacheBaseURL),
382407 job ["steps" ] = newSteps
383408}
384409
385- func injectTmateStep (jobs map [string ]interface {}, jobName string , afterStep int , patchInjected bool ) error {
410+ func injectDebugStep (jobs map [string ]interface {}, jobName string , afterStep int , patchInjected bool ) error {
386411 jobRaw , ok := jobs [jobName ]
387412 if ! ok {
388413 return fmt .Errorf ("job %q not found" , jobName )
@@ -400,11 +425,12 @@ func injectTmateStep(jobs map[string]interface{}, jobName string, afterStep int,
400425 return fmt .Errorf ("job %q steps is not a list" , jobName )
401426 }
402427
403- tmateStep := map [string ]interface {}{
404- "uses" : "mxschmitt/action-tmate@v3" ,
405- "with" : map [string ]interface {}{
406- "limit-access-to-actor" : "false" ,
407- },
428+ debugStep := map [string ]interface {}{
429+ "name" : "Depot SSH Debug" ,
430+ "run" : "echo '::depot-ssh-ready::'\n " +
431+ "echo 'SSH session active. Run: touch /tmp/depot-continue to resume workflow.'\n " +
432+ "while [ ! -f /tmp/depot-continue ]; do sleep 5; done\n " +
433+ "echo 'Continuing workflow...'" ,
408434 }
409435
410436 insertAt := afterStep
@@ -434,7 +460,7 @@ func injectTmateStep(jobs map[string]interface{}, jobName string, afterStep int,
434460
435461 newSteps := make ([]interface {}, 0 , len (steps )+ 1 )
436462 newSteps = append (newSteps , steps [:insertAt ]... )
437- newSteps = append (newSteps , tmateStep )
463+ newSteps = append (newSteps , debugStep )
438464 newSteps = append (newSteps , steps [insertAt :]... )
439465 job ["steps" ] = newSteps
440466
@@ -478,6 +504,68 @@ func formatStatus(s civ1.CIRunStatus) string {
478504 }
479505}
480506
507+ // waitForLogMarker polls the job attempt logs until a line containing marker
508+ // appears. This is used to detect when the injected debug step is running.
509+ func waitForLogMarker (ctx context.Context , token , orgID , runID , jobKey , marker string ) error {
510+ const pollInterval = 3 * time .Second
511+ const timeout = 10 * time .Minute
512+
513+ deadline := time .Now ().Add (timeout )
514+
515+ for {
516+ if time .Now ().After (deadline ) {
517+ return fmt .Errorf ("timed out waiting for log marker (waited %s)" , timeout )
518+ }
519+
520+ // Resolve the latest attempt ID for the job.
521+ resp , err := api .CIGetRunStatus (ctx , token , orgID , runID )
522+ if err != nil {
523+ // Transient error, keep polling.
524+ select {
525+ case <- ctx .Done ():
526+ return ctx .Err ()
527+ case <- time .After (pollInterval ):
528+ }
529+ continue
530+ }
531+
532+ targetJob , err := findJob (resp , jobKey , "" )
533+ if err != nil {
534+ select {
535+ case <- ctx .Done ():
536+ return ctx .Err ()
537+ case <- time .After (pollInterval ):
538+ }
539+ continue
540+ }
541+
542+ attempt := latestAttempt (targetJob )
543+ if attempt == nil {
544+ select {
545+ case <- ctx .Done ():
546+ return ctx .Err ()
547+ case <- time .After (pollInterval ):
548+ }
549+ continue
550+ }
551+
552+ lines , err := api .CIGetJobAttemptLogs (ctx , token , orgID , attempt .AttemptId )
553+ if err == nil {
554+ for _ , line := range lines {
555+ if strings .Contains (line .Body , marker ) {
556+ return nil
557+ }
558+ }
559+ }
560+
561+ select {
562+ case <- ctx .Done ():
563+ return ctx .Err ()
564+ case <- time .After (pollInterval ):
565+ }
566+ }
567+ }
568+
481569func NewCmdRunList () * cobra.Command {
482570 var (
483571 orgID string
0 commit comments