diff --git a/.codex/agents/pr-review-api-contracts.toml b/.codex/agents/pr-review-api-contracts.toml new file mode 100644 index 0000000..3d8a031 --- /dev/null +++ b/.codex/agents/pr-review-api-contracts.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-api-contracts" +description = "Read-only AgentTower PR review agent for CLI, socket API, and integration contracts." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on CLI text/JSON/TSV contract drift, socket request/response envelopes, +closed-set error codes, backward compatibility, environment variable behavior, +and host/container integration assumptions. +""" diff --git a/.codex/agents/pr-review-architecture.toml b/.codex/agents/pr-review-architecture.toml new file mode 100644 index 0000000..6d6b3b0 --- /dev/null +++ b/.codex/agents/pr-review-architecture.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-architecture" +description = "Read-only AgentTower PR review agent for architecture, design boundaries, and pattern consistency." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on architecture regressions, misplaced responsibilities, broken layering, +Spec Kit/OpenSpec drift, and AgentTower-specific daemon/container/tmux design +boundaries. Prefer concrete findings over style-only commentary. +""" diff --git a/.codex/agents/pr-review-coordinator.toml b/.codex/agents/pr-review-coordinator.toml new file mode 100644 index 0000000..8325f8e --- /dev/null +++ b/.codex/agents/pr-review-coordinator.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-coordinator" +description = "Read-only AgentTower PR review coordinator that aggregates expert review findings." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Coordinate the review panel, select up to five technology-specific passes based +on the PR diff, deduplicate findings, order findings by severity, and prefer no +findings over weak or speculative findings. +""" diff --git a/.codex/agents/pr-review-data-schema.toml b/.codex/agents/pr-review-data-schema.toml new file mode 100644 index 0000000..153aad2 --- /dev/null +++ b/.codex/agents/pr-review-data-schema.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-data-schema" +description = "Read-only AgentTower PR review agent for data, SQLite schema, and migrations." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on SQLite schema changes, migration idempotence, compatibility with prior +versions, indexes, constraints, JSON shape stability, offset persistence, +event/audit rows, and data lifecycle semantics. +""" diff --git a/.codex/agents/pr-review-maintainability.toml b/.codex/agents/pr-review-maintainability.toml new file mode 100644 index 0000000..db553bf --- /dev/null +++ b/.codex/agents/pr-review-maintainability.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-maintainability" +description = "Read-only AgentTower PR review agent for maintainability and refactoring risk." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on unnecessary coupling, duplicated logic, unclear boundaries, +hard-to-test code, brittle fixtures, hidden global state, and refactors that +could mask behavior changes. +""" diff --git a/.codex/agents/pr-review-observability.toml b/.codex/agents/pr-review-observability.toml new file mode 100644 index 0000000..3645b92 --- /dev/null +++ b/.codex/agents/pr-review-observability.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-observability" +description = "Read-only AgentTower PR review agent for observability and operations." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on diagnostics, lifecycle logging, JSONL event separation, actionable +error messages, config doctor behavior, degraded-state reporting, auditability, +and operator recovery paths. +""" diff --git a/.codex/agents/pr-review-performance.toml b/.codex/agents/pr-review-performance.toml new file mode 100644 index 0000000..26cbf07 --- /dev/null +++ b/.codex/agents/pr-review-performance.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-performance" +description = "Read-only AgentTower PR review agent for performance and scalability risks." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on inefficient scans, unbounded loops, excessive subprocess calls, +large-output behavior, SQLite query shape, log-reader throughput, debounce +behavior, and avoidable work in CLI/daemon hot paths. +""" diff --git a/.codex/agents/pr-review-reliability.toml b/.codex/agents/pr-review-reliability.toml new file mode 100644 index 0000000..c191c6d --- /dev/null +++ b/.codex/agents/pr-review-reliability.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-reliability" +description = "Read-only AgentTower PR review agent for reliability, concurrency, and recovery." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on daemon lifecycle, restart recovery, stale state, idempotency, +concurrent scans, queue/routing races, transaction boundaries, post-commit side +effects, partial failure isolation, and timeout behavior. +""" diff --git a/.codex/agents/pr-review-security.toml b/.codex/agents/pr-review-security.toml new file mode 100644 index 0000000..c1ab4f7 --- /dev/null +++ b/.codex/agents/pr-review-security.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-security" +description = "Read-only AgentTower PR review agent for security and privilege-boundary risks." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on shell interpolation, unsafe tmux input delivery, socket permission and +SO_PEERCRED behavior, path traversal, symlink handling, secret leakage, log text +execution, Docker/tmux trust boundaries, and unsafe degradation. +""" diff --git a/.codex/agents/pr-review-testing.toml b/.codex/agents/pr-review-testing.toml new file mode 100644 index 0000000..319ca62 --- /dev/null +++ b/.codex/agents/pr-review-testing.toml @@ -0,0 +1,10 @@ +name = "agenttower-pr-review-testing" +description = "Read-only AgentTower PR review agent for tests, coverage, and QA gaps." +sandbox_mode = "read-only" + +instructions = """ +Review only. Do not edit files. +Focus on missing tests for changed behavior, weak assertions, untested degraded +paths, race coverage, CLI output contract coverage, schema migration coverage, +and whether focused validation matches the risk of the PR. +""" diff --git a/.codex/config.toml b/.codex/config.toml new file mode 100644 index 0000000..df445fb --- /dev/null +++ b/.codex/config.toml @@ -0,0 +1,3 @@ +[agents] +max_threads = 15 +max_depth = 1 diff --git a/.github/codex/prompts/pr-review.md b/.github/codex/prompts/pr-review.md new file mode 100644 index 0000000..26fdf48 --- /dev/null +++ b/.github/codex/prompts/pr-review.md @@ -0,0 +1,176 @@ +# AgentTower Codex PR Review + +You are reviewing an AgentTower pull request in GitHub Actions. + +This review is **read-only**: + +- Do not edit, create, delete, stage, commit, or push files. +- Do not run formatters or generators that mutate the checkout. +- Do not write secrets or request secrets. +- Do not print environment variables that could contain secrets. +- You may inspect files, git history, diffs, and run read-only commands. +- If running tests, choose focused tests relevant to the PR and avoid any command + that mutates source files. Temporary test artifacts are acceptable only when + produced by the test runner. + +## Repository Review Skill + +Before reviewing, inspect the local AgentTower review guidance if present: + +1. `.codex/skills/agenttower-pr-review/SKILL.md` +2. `.agents/skills/agenttower-pr-review/SKILL.md` +3. `.codex/skills/agenttower-review/SKILL.md` +4. `.agents/skills/agenttower-review/SKILL.md` + +Reuse those local checks. Do not duplicate or supersede them with stale generic +instructions. If a local skill is missing, continue with this prompt. + +## PR Context + +The workflow checks out the PR merge ref and fetches: + +- Base branch: `origin/${PR_BASE_REF}` +- PR head ref: `refs/remotes/pull/${PR_NUMBER}/head` + +Environment variables available to you: + +- `PR_NUMBER` +- `PR_BASE_REF` +- `PR_BASE_SHA` +- `PR_HEAD_REF` +- `PR_HEAD_SHA` +- `PR_HEAD_REPO` +- `PR_TITLE` + +Review the PR diff against its base. Prefer: + +```bash +git diff --stat "origin/${PR_BASE_REF}...refs/remotes/pull/${PR_NUMBER}/head" +git diff --unified=0 "origin/${PR_BASE_REF}...refs/remotes/pull/${PR_NUMBER}/head" +``` + +If the fetched PR head ref is unavailable, fall back to the checked-out merge +commit and explain the fallback briefly. + +## Mandatory Expert Panel + +First determine the review panel before reviewing files. + +The standard panel is mandatory and contains exactly these 10 agents/passes: + +1. master review coordinator +2. software pattern architecture expert +3. optimization/performance expert +4. security expert +5. QA/testing expert +6. reliability/concurrency expert +7. data/schema/migration expert +8. API/contracts/integration expert +9. observability/operations expert +10. maintainability/refactoring expert + +Then dynamically add up to 5 technology-specific expert agents/passes based on +the PR contents. Examples include Python packaging, SQLite, tmux, Docker, +GitHub Actions, shell scripting, pytest, JSONL/event pipelines, or SonarQube. + +If Codex subagents are available in this runner, spawn/use the agents for the +standard panel and selected technology-specific experts. If subagent spawning is +not available, still execute each expert pass explicitly yourself and state in +the final output: `Subagent spawning unavailable; expert passes executed inline.` + +Each expert pass must be read-only and should focus on concrete defects, not +style-only preferences. + +## Review Focus + +Lead with correctness and release risk. Check for: + +- behavior regressions introduced by the diff +- security and privilege boundary issues +- unsafe terminal input, shell interpolation, or prompt/log execution paths +- daemon transaction boundaries and post-commit side effects +- SQLite migration/versioning mistakes and backward compatibility +- CLI text/JSON contract drift +- socket protocol compatibility, peer-uid behavior, and permission handling +- Docker/tmux/container identity assumptions +- lifecycle log versus JSONL event-surface separation +- event reader, offset, debounce, and restart correctness +- queue/routing/arbitration race conditions when relevant +- missing or weak tests for changed behavior +- SonarQube quality-gate risks and Copilot-style review issues +- maintainability risks that hide defects or make future changes unsafe +- operational risks: degraded paths, recovery, diagnostics, idempotence + +Avoid style-only comments unless the style problem hides a concrete defect or +maintenance risk. + +Prefer no findings over speculative findings. + +## Suggested Review Procedure + +1. Read this prompt and the local review skill files. +2. Determine the mandatory panel and dynamic technology-specific passes. +3. Inspect PR metadata and changed files: + + ```bash + git status --short --branch + git diff --stat "origin/${PR_BASE_REF}...refs/remotes/pull/${PR_NUMBER}/head" + git diff --name-only "origin/${PR_BASE_REF}...refs/remotes/pull/${PR_NUMBER}/head" + ``` + +4. Read the relevant diffs and surrounding source. +5. Trace changed behavior into tests, contracts, specs, and docs. +6. Run focused read-only validation when practical. +7. Aggregate findings from all expert passes. + +## Final Output Format + +Post a concise Markdown review comment. + +Start with a short review-panel line: + +```text +Review panel: standard 10 passes + technology-specific passes (). +Subagent spawning: . +``` + +Then lead with concrete findings ordered by severity: + +```markdown +## Findings + +- [P1] Title + - File/line: `path/to/file.py:123` + - Issue: What is wrong and why it matters. + - Suggested fix: Concrete fix direction. +``` + +Severity guidance: + +- `P0`: blocks merge; data loss, severe security issue, broken build/release. +- `P1`: high-confidence correctness, security, migration, or major regression. +- `P2`: real bug or missing coverage with moderate blast radius. +- `P3`: low-risk maintainability/test gap worth addressing. + +If no actionable issues are found: + +```markdown +## Findings + +No actionable issues found. +``` + +After findings, include: + +```markdown +## Residual Risk / Test Gaps + +- ... + +## Merge Readiness + +Ready / Not ready, with one concise reason. +``` + +Use file/line references whenever possible. If an exact changed line is not +available, cite the nearest stable function, test, or contract section. diff --git a/.github/workflows/codex-pr-review.yml b/.github/workflows/codex-pr-review.yml new file mode 100644 index 0000000..69eda78 --- /dev/null +++ b/.github/workflows/codex-pr-review.yml @@ -0,0 +1,140 @@ +name: Codex PR review + +on: + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + workflow_dispatch: + inputs: + pr_number: + description: Pull request number to review + required: true + type: number + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + codex-pr-review: + name: Codex PR review + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' || github.event.pull_request.draft == false + + steps: + - name: Resolve PR metadata + id: pr + uses: actions/github-script@v7 + with: + script: | + const number = context.eventName === 'workflow_dispatch' + ? Number(core.getInput('pr_number')) + : context.payload.pull_request.number; + + if (!Number.isInteger(number) || number <= 0) { + throw new Error(`Invalid PR number: ${number}`); + } + + const { data: pull } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: number, + }); + + if (pull.draft) { + core.setOutput('skip', 'true'); + core.notice(`Skipping draft PR #${number}.`); + return; + } + + core.setOutput('skip', 'false'); + core.setOutput('number', String(number)); + core.setOutput('base_ref', pull.base.ref); + core.setOutput('base_sha', pull.base.sha); + core.setOutput('head_ref', pull.head.ref); + core.setOutput('head_sha', pull.head.sha); + core.setOutput('head_repo', pull.head.repo.full_name); + core.setOutput('title', pull.title || ''); + + - name: Check out PR merge ref + if: steps.pr.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + ref: refs/pull/${{ steps.pr.outputs.number }}/merge + fetch-depth: 0 + persist-credentials: false + + - name: Fetch PR base and head refs + if: steps.pr.outputs.skip != 'true' + env: + PR_NUMBER: ${{ steps.pr.outputs.number }} + PR_BASE_REF: ${{ steps.pr.outputs.base_ref }} + run: | + set -euo pipefail + git fetch --no-tags origin \ + "+refs/heads/${PR_BASE_REF}:refs/remotes/origin/${PR_BASE_REF}" \ + "+refs/pull/${PR_NUMBER}/head:refs/remotes/pull/${PR_NUMBER}/head" + + - name: Run Codex review + if: steps.pr.outputs.skip != 'true' + id: run_codex + uses: openai/codex-action@v1 + env: + PR_NUMBER: ${{ steps.pr.outputs.number }} + PR_BASE_REF: ${{ steps.pr.outputs.base_ref }} + PR_BASE_SHA: ${{ steps.pr.outputs.base_sha }} + PR_HEAD_REF: ${{ steps.pr.outputs.head_ref }} + PR_HEAD_SHA: ${{ steps.pr.outputs.head_sha }} + PR_HEAD_REPO: ${{ steps.pr.outputs.head_repo }} + PR_TITLE: ${{ steps.pr.outputs.title }} + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + prompt-file: .github/codex/prompts/pr-review.md + output-file: codex-output.md + safety-strategy: drop-sudo + sandbox: workspace-write + + - name: Post Codex review comment + if: steps.pr.outputs.skip != 'true' && always() + uses: actions/github-script@v7 + env: + PR_NUMBER: ${{ steps.pr.outputs.number }} + CODEX_FINAL_MESSAGE: ${{ steps.run_codex.outputs.final-message }} + with: + script: | + const fs = require('fs'); + + let body = (process.env.CODEX_FINAL_MESSAGE || '').trim(); + if (!body && fs.existsSync('codex-output.md')) { + body = fs.readFileSync('codex-output.md', 'utf8').trim(); + } + + if (!body) { + body = 'Codex PR review did not produce a final message. See the workflow logs and uploaded artifact for details.'; + } + + const maxCommentBytes = 60000; + if (Buffer.byteLength(body, 'utf8') > maxCommentBytes) { + body = Buffer.from(body, 'utf8').subarray(0, maxCommentBytes).toString('utf8') + + '\n\n[Truncated: see the uploaded `codex-output.md` artifact for the full review.]'; + } + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: Number(process.env.PR_NUMBER), + body, + }); + + - name: Upload Codex review artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: codex-pr-review-${{ steps.pr.outputs.number || github.run_id }} + path: codex-output.md + if-no-files-found: warn + retention-days: 14 diff --git a/.gitignore b/.gitignore index cd86772..d782fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ build/ # E2E + smoke test scratch dirs .tmp/ + +# Host/bench-local Codex driver config (operator-specific, never committed) +.codex/speckit-claude-driver.json