codex/codex-cli/src/approvals.ts at 135580d940e8ea133f451cd0bb5c1fae956c537c · blocksuser/codex · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
import type { ParseEntry, ControlOperator } from "shell-quote";

import {
  identify_files_added,
  identify_files_needed,
} from "./utils/agent/apply-patch";
import * as path from "path";
import { parse } from "shell-quote";

export type SafetyAssessment = {
  /**
   * If set, this approval is for an apply_patch call and these are the
   * arguments.
   */
  applyPatch?: ApplyPatchCommand;
} & (
  | {
      type: "auto-approve";
      /**
       * This must be true if the command is not on the "known safe" list, but
       * was auto-approved due to `full-auto` mode.
       */
      runInSandbox: boolean;
      reason: string;
      group: string;
    }
  | {
      type: "ask-user";
    }
  /**
   * Reserved for a case where we are certain the command is unsafe and should
   * not be presented as an option to the user.
   */
  | {
      type: "reject";
      reason: string;
    }
);

// TODO: This should also contain the paths that will be affected.
export type ApplyPatchCommand = {
  patch: string;
};

export type ApprovalPolicy =
  /**
   * Under this policy, only "known safe" commands as defined by
   * `isSafeCommand()` that only read files will be auto-approved.
   */
  | "suggest"

  /**
   * In addition to commands that are auto-approved according to the rules for
   * "suggest", commands that write files within the user's approved list of
   * writable paths will also be auto-approved.
   */
  | "auto-edit"

  /**
   * All commands are auto-approved, but are expected to be run in a sandbox
   * where network access is disabled and writes are limited to a specific set
   * of paths.
   */
  | "full-auto"

  /**
   * All commands are auto-approved WITHOUT any sandboxing or safety checks.
   * This is EXTREMELY DANGEROUS and should only be used in trusted environments
   * where the execution environment itself provides appropriate security.
   */
  | "dangerous-auto";

/**
 * Tries to assess whether a command is safe to run, though may defer to the
 * user for approval.
 *
 * Note `env` must be the same `env` that will be used to spawn the process.
 */
export function canAutoApprove(
  command: ReadonlyArray<string>,
  workdir: string | undefined,
  policy: ApprovalPolicy,
  writableRoots: ReadonlyArray<string>,
  env: NodeJS.ProcessEnv = process.env,
): SafetyAssessment {
  if (command[0] === "apply_patch") {
    return command.length === 2 && typeof command[1] === "string"
      ? canAutoApproveApplyPatch(command[1], workdir, writableRoots, policy)
      : {
          type: "reject",
          reason: "Invalid apply_patch command",
        };
  }

  const isSafe = isSafeCommand(command);
  if (isSafe != null) {
    const { reason, group } = isSafe;
    return {
      type: "auto-approve",
      reason,
      group,
      runInSandbox: false,
    };
  }

  if (
    command[0] === "bash" &&
    command[1] === "-lc" &&
    typeof command[2] === "string" &&
    command.length === 3
  ) {
    const applyPatchArg = tryParseApplyPatch(command[2]);
    if (applyPatchArg != null) {
      return canAutoApproveApplyPatch(
        applyPatchArg,
        workdir,
        writableRoots,
        policy,
      );
    }

    let bashCmd;
    try {
      bashCmd = parse(command[2], env);
    } catch (e) {
      // In practice, there seem to be syntactically valid shell commands that
      // shell-quote cannot parse, so we should not reject, but ask the user.
      switch (policy) {
        case "dangerous-auto":
          // In dangerous-auto, we run all commands without sandboxing or prompting.
          // This is EXTREMELY DANGEROUS and should only be used in trusted environments.
          return {
            type: "auto-approve",
            reason: "Dangerous auto mode",
            group: "Running commands",
            runInSandbox: false,
          };
        case "full-auto":
          // In full-auto, we still run the command automatically, but must
          // restrict it to the sandbox.
          return {
            type: "auto-approve",
            reason: "Full auto mode",
            group: "Running commands",
            runInSandbox: true,
          };
        case "suggest":
        case "auto-edit":
          // In all other modes, since we cannot reason about the command, we
          // should ask the user.
          return {
            type: "ask-user",
          };
      }
    }

    // bashCmd could be a mix of strings and operators, e.g.:
    //   "ls || (true && pwd)" => [ 'ls', { op: '||' }, '(', 'true', { op: '&&' }, 'pwd', ')' ]
    // We try to ensure that *every* command segment is deemed safe and that
    // all operators belong to an allow-list. If so, the entire expression is
    // considered auto-approvable.

    const shellSafe = isEntireShellExpressionSafe(bashCmd);
    if (shellSafe != null) {
      const { reason, group } = shellSafe;
      return {
        type: "auto-approve",
        reason,
        group,
        runInSandbox: false,
      };
    }
  }

  if (policy === "dangerous-auto") {
    return {
      type: "auto-approve",
      reason: "Dangerous auto mode",
      group: "Running commands",
      runInSandbox: false,
    };
  }

  return policy === "full-auto"
    ? {
        type: "auto-approve",
        reason: "Full auto mode",
        group: "Running commands",
        runInSandbox: true,
      }
    : { type: "ask-user" };
}

function canAutoApproveApplyPatch(
  applyPatchArg: string,
  workdir: string | undefined,
  writableRoots: ReadonlyArray<string>,
  policy: ApprovalPolicy,
): SafetyAssessment {
  switch (policy) {
    case "dangerous-auto":
      // In dangerous-auto mode, immediately auto-approve without any path checks
      return {
        type: "auto-approve",
        reason: "Dangerous auto mode",
        group: "Editing",
        runInSandbox: false,
        applyPatch: { patch: applyPatchArg },
      };
    case "full-auto":
      // Continue to see if this can be auto-approved.
      break;
    case "suggest":
      return {
        type: "ask-user",
        applyPatch: { patch: applyPatchArg },
      };
    case "auto-edit":
      // Continue to see if this can be auto-approved.
      break;
  }

  if (
    isWritePatchConstrainedToWritablePaths(
      applyPatchArg,
      workdir,
      writableRoots,
    )
  ) {
    return {
      type: "auto-approve",
      reason: "apply_patch command is constrained to writable paths",
      group: "Editing",
      runInSandbox: false,
      applyPatch: { patch: applyPatchArg },
    };
  }

  return policy === "full-auto"
    ? {
        type: "auto-approve",
        reason: "Full auto mode",
        group: "Editing",
        runInSandbox: true,
        applyPatch: { patch: applyPatchArg },
      }
    : {
        type: "ask-user",
        applyPatch: { patch: applyPatchArg },
      };
}

/**
 * All items in `writablePaths` must be absolute paths.
 */
function isWritePatchConstrainedToWritablePaths(
  applyPatchArg: string,
  workdir: string | undefined,
  writableRoots: ReadonlyArray<string>,
): boolean {
  // `identify_files_needed()` returns a list of files that will be modified or
  // deleted by the patch, so all of them should already exist on disk. These
  // candidate paths could be further canonicalized via fs.realpath(), though
  // that does seem necessary and may even cause false negatives (assuming we
  // allow writes in other directories that are symlinked from a writable path)
  //
  // By comparison, `identify_files_added()` returns a list of files that will
  // be added by the patch, so they should NOT exist on disk yet and therefore
  // using one with fs.realpath() should return an error.
  return (
    allPathsConstrainedTowritablePaths(
      identify_files_needed(applyPatchArg),
      workdir,
      writableRoots,
    ) &&
    allPathsConstrainedTowritablePaths(
      identify_files_added(applyPatchArg),
      workdir,
      writableRoots,
    )
  );
}

function allPathsConstrainedTowritablePaths(
  candidatePaths: ReadonlyArray<string>,
  workdir: string | undefined,
  writableRoots: ReadonlyArray<string>,
): boolean {
  return candidatePaths.every((candidatePath) =>
    isPathConstrainedTowritablePaths(candidatePath, workdir, writableRoots),
  );
}

/** If candidatePath is relative, it will be resolved against cwd. */
function isPathConstrainedTowritablePaths(
  candidatePath: string,
  workdir: string | undefined,
  writableRoots: ReadonlyArray<string>,
): boolean {
  const candidateAbsolutePath = resolvePathAgainstWorkdir(
    candidatePath,
    workdir,
  );

  return writableRoots.some((writablePath) =>
    pathContains(writablePath, candidateAbsolutePath),
  );
}

/**
 * If not already an absolute path, resolves `candidatePath` against `workdir`
 * if specified; otherwise, against `process.cwd()`.
 */
export function resolvePathAgainstWorkdir(
  candidatePath: string,
  workdir: string | undefined,
): string {
  // Normalize candidatePath to prevent path traversal attacks
  const normalizedCandidatePath = path.normalize(candidatePath);
  if (path.isAbsolute(normalizedCandidatePath)) {
    return normalizedCandidatePath;
  } else if (workdir != null) {
    return path.resolve(workdir, normalizedCandidatePath);
  } else {
    return path.resolve(normalizedCandidatePath);
  }
}

/** Both `parent` and `child` must be absolute paths. */
function pathContains(parent: string, child: string): boolean {
  const relative = path.relative(parent, child);
  return (
    // relative path doesn't go outside parent
    !!relative && !relative.startsWith("..") && !path.isAbsolute(relative)
  );
}

/**
 * `bashArg` might be something like "apply_patch << 'EOF' *** Begin...".
 * If this function returns a string, then it is the content the arg to
 * apply_patch with the heredoc removed.
 */
function tryParseApplyPatch(bashArg: string): string | null {
  const prefix = "apply_patch";
  if (!bashArg.startsWith(prefix)) {
    return null;
  }

  const heredoc = bashArg.slice(prefix.length);
  const heredocMatch = heredoc.match(
    /^\s*<<\s*['"]?(\w+)['"]?\n([\s\S]*?)\n\1/,
  );
  if (heredocMatch != null && typeof heredocMatch[2] === "string") {
    return heredocMatch[2].trim();
  } else {
    return heredoc.trim();
  }
}

export type SafeCommandReason = {
  reason: string;
  group: string;
};

/**
 * If this is a "known safe" command, returns the (reason, group); otherwise,
 * returns null.
 */
export function isSafeCommand(
  command: ReadonlyArray<string>,
): SafeCommandReason | null {
  const [cmd0, cmd1, cmd2, cmd3] = command;

  switch (cmd0) {
    case "cd":
      return {
        reason: "Change directory",
        group: "Navigating",
      };
    case "ls":
      return {
        reason: "List directory",
        group: "Searching",
      };
    case "pwd":
      return {
        reason: "Print working directory",
        group: "Navigating",
      };
    case "true":
      return {
        reason: "No-op (true)",
        group: "Utility",
      };
    case "echo":
      return { reason: "Echo string", group: "Printing" };
    case "cat":
      return {
        reason: "View file contents",
        group: "Reading files",
      };
    case "nl":
      return {
        reason: "View file with line numbers",
        group: "Reading files",
      };
    case "rg":
      return {
        reason: "Ripgrep search",
        group: "Searching",
      };
    case "find": {
      // Certain options to `find` allow executing arbitrary processes, so we
      // cannot auto-approve them.
      if (
        command.some((arg: string) => UNSAFE_OPTIONS_FOR_FIND_COMMAND.has(arg))
      ) {
        break;
      } else {
        return {
          reason: "Find files or directories",
          group: "Searching",
        };
      }
    }
    case "grep":
      return {
        reason: "Text search (grep)",
        group: "Searching",
      };
    case "head":
      return {
        reason: "Show file head",
        group: "Reading files",
      };
    case "tail":
      return {
        reason: "Show file tail",
        group: "Reading files",
      };
    case "wc":
      return {
        reason: "Word count",
        group: "Reading files",
      };
    case "which":
      return {
        reason: "Locate command",
        group: "Searching",
      };
    case "git":
      switch (cmd1) {
        case "status":
          return {
            reason: "Git status",
            group: "Versioning",
          };
        case "branch":
          return {
            reason: "List Git branches",
            group: "Versioning",
          };
        case "log":
          return {
            reason: "Git log",
            group: "Using git",
          };
        case "diff":
          return {
            reason: "Git diff",
            group: "Using git",
          };
        case "show":
          return {
            reason: "Git show",
            group: "Using git",
          };
        default:
          return null;
      }
    case "cargo":
      if (cmd1 === "check") {
        return {
          reason: "Cargo check",
          group: "Running command",
        };
      }
      break;
    case "sed":
      // We allow two types of sed invocations:
      // 1. `sed -n 1,200p FILE`
      // 2. `sed -n 1,200p` because the file is passed via stdin, e.g.,
      //    `nl -ba README.md | sed -n '1,200p'`
      if (
        cmd1 === "-n" &&
        isValidSedNArg(cmd2) &&
        (command.length === 3 ||
          (typeof cmd3 === "string" && command.length === 4))
      ) {
        return {
          reason: "Sed print subset",
          group: "Reading files",
        };
      }
      break;
    default:
      return null;
  }

  return null;
}

function isValidSedNArg(arg: string | undefined): boolean {
  return arg != null && /^(\d+,)?\d+p$/.test(arg);
}

const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
  // Options that can execute arbitrary commands.
  "-exec",
  "-execdir",
  "-ok",
  "-okdir",
  // Option that deletes matching files.
  "-delete",
  // Options that write pathnames to a file.
  "-fls",
  "-fprint",
  "-fprint0",
  "-fprintf",
]);

// ---------------- Helper utilities for complex shell expressions -----------------

// A conservative allow-list of bash operators that do not, on their own, cause
// side effects. Redirections (>, >>, <, etc.) and command substitution `$()`
// are intentionally excluded. Parentheses used for grouping are treated as
// strings by `shell-quote`, so we do not add them here. Reference:
// https://github.com/substack/node-shell-quote#parsecmd-opts
const SAFE_SHELL_OPERATORS: ReadonlySet<string> = new Set([
  "&&", // logical AND
  "||", // logical OR
  "|", // pipe
  ";", // command separator
]);

/**
 * Determines whether a parsed shell expression consists solely of safe
 * commands (as per `isSafeCommand`) combined using only operators in
 * `SAFE_SHELL_OPERATORS`.
 *
 * If entirely safe, returns the reason/group from the *first* command
 * segment so callers can surface a meaningful description. Otherwise returns
 * null.
 */
function isEntireShellExpressionSafe(
  parts: ReadonlyArray<ParseEntry>,
): SafeCommandReason | null {
  if (parts.length === 0) {
    return null;
  }

  try {
    // Collect command segments delimited by operators. `shell-quote` represents
    // subshell grouping parentheses as literal strings "(" and ")"; treat them
    // as unsafe to keep the logic simple (since subshells could introduce
    // unexpected scope changes).

    let currentSegment: Array<string> = [];
    let firstReason: SafeCommandReason | null = null;

    const flushSegment = (): boolean => {
      if (currentSegment.length === 0) {
        return true; // nothing to validate (possible leading operator)
      }
      const assessment = isSafeCommand(currentSegment);
      if (assessment == null) {
        return false;
      }
      if (firstReason == null) {
        firstReason = assessment;
      }
      currentSegment = [];
      return true;
    };

    for (const part of parts) {
      if (typeof part === "string") {
        // If this string looks like an open/close parenthesis or brace, treat as
        // unsafe to avoid parsing complexity.
        if (part === "(" || part === ")" || part === "{" || part === "}") {
          return null;
        }
        currentSegment.push(part);
      } else if (isParseEntryWithOp(part)) {
        // Validate the segment accumulated so far.
        if (!flushSegment()) {
          return null;
        }

        // Validate the operator itself.
        if (!SAFE_SHELL_OPERATORS.has(part.op)) {
          return null;
        }
      } else {
        // Unknown token type
        return null;
      }
    }

    // Validate any trailing command segment.
    if (!flushSegment()) {
      return null;
    }

    return firstReason;
  } catch (_err) {
    // If there's any kind of failure, just bail out and return null.
    return null;
  }
}

// Runtime type guard that narrows a `ParseEntry` to the variants that
// carry an `op` field. Using a dedicated function avoids the need for
// inline type assertions and makes the narrowing reusable and explicit.
function isParseEntryWithOp(
  entry: ParseEntry,
): entry is { op: ControlOperator } | { op: "glob"; pattern: string } {
  return (
    typeof entry === "object" &&
    entry != null &&
    // Using the safe `in` operator keeps the check property-safe even when
    // `entry` is a `string`.
    "op" in entry &&
    typeof (entry as { op?: unknown }).op === "string"
  );
}