From 9dc4af415e5e74a95ca041754176e523382e0dc8 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Mon, 8 Jun 2026 20:46:25 -0700 Subject: [PATCH 01/16] Improve EEGPrep autoreview skill --- .agents/skills/oc-autoreview-adapted/SKILL.md | 190 +-- .../oc-autoreview-adapted/agents/openai.yaml | 4 +- .../oc-autoreview-adapted/scripts/autoreview | 1064 +++++++++++++---- .../scripts/test-review-harness.ps1 | 51 + .../scripts/test-review-harness.py | 62 +- 5 files changed, 974 insertions(+), 397 deletions(-) create mode 100644 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md index a0810028..763a9f4b 100644 --- a/.agents/skills/oc-autoreview-adapted/SKILL.md +++ b/.agents/skills/oc-autoreview-adapted/SKILL.md @@ -1,166 +1,108 @@ --- name: oc-autoreview-adapted -description: Run an autonomous EEGPrep-focused structured autoreview on local changes, branches, commits, or PRs using the bundled Codex helper. Use when the user asks for autoreview, OC autoreview, closeout review, second-pass review, final review before commit/push/PR, or when non-trivial EEGPrep code changes need a high-signal correctness, EEGLAB parity, GUI/session, tests, and repo-instruction check. +description: Run autonomous EEGPrep-focused structured autoreview on dirty changes, branches, commits, PR stacks, or the whole EEGPrep-owned codebase; verify and fix real findings from first principles using AGENTS.md, EEGLAB parity, GUI/console, tests, docs, and security constraints. --- # OC Autoreview Adapted -Run the bundled structured review helper as an autonomous closeout check for -EEGPrep. This skill adapts the OpenClaw autoreview principles to this project: -one frozen diff bundle, one structured JSON result, validated changed-file -findings, read-only inspection, heartbeat progress, optional parallel tests, and -repeat-until-clean behavior. +Use the bundled helper for high-signal closeout review or whole-codebase bug hunts. It builds one bounded review bundle, runs one or more read-only reviewer engines, validates structured JSON, prints heartbeats for long runs, and exits nonzero when actionable findings remain. ## Contract -- Run the helper for real unless the user explicitly asks for a plan or manual - review only. -- Treat review output as advisory. Verify every accepted finding by reading the - real code path and adjacent files before fixing or reporting it. -- Keep going until the helper exits cleanly with no accepted/actionable findings - or until you consciously reject a remaining finding with a concrete reason. -- If a review-triggered fix changes code, rerun focused tests and rerun the - helper on the same target. -- Do not run nested review tools from inside a review. The helper builds one - bundle, calls Codex in read-only mode, validates the result, and exits. -- Do not push, stage, commit, or open a PR just to run autoreview. Do those only - when the user requested that action. -- Be patient. The helper prints heartbeat lines such as - `review still running: codex elapsed=... pid=...`; those are healthy progress. - -## Helper - -Use the repo-local helper: +- Run it for real unless the user asked only for a plan. +- Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it. +- Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects. +- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, and subjective MATLAB/Python style comments. +- If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason. +- Do not invoke nested review tools from inside review. The helper already runs one structured review path. +- Do not push/stage/commit/open PR unless the user requested that separately. -```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview --help -``` - -The helper: - -- defaults to Codex with read-only sandboxing and web search enabled; -- chooses dirty local changes first in `--mode auto`; -- otherwise uses the current PR base when discoverable, then `origin/develop`; -- accepts `--mode local`, `--mode branch --base origin/develop`, and - `--mode commit --commit HEAD`; -- includes root/scoped `AGENTS.md` instructions in the review bundle; -- validates structured JSON against an EEGPrep-specific schema; -- filters findings to changed files only; -- exits nonzero when accepted/actionable findings remain; -- supports `--prompt`, `--prompt-file`, `--dataset`, `--json-output`, - `--output`, `--parallel-tests`, `--require-finding`, `--expect-findings`, - `--no-web-search`, `--model`, and `--thinking`. - -The smoke harness creates a temporary EEG-style fixture repo: - -```bash -.agents/skills/oc-autoreview-adapted/scripts/test-review-harness --dry-run -``` +## Commands -Run the full harness only when it is acceptable to spend a real Codex review: +Set paths once: ```bash -.agents/skills/oc-autoreview-adapted/scripts/test-review-harness --fixture buggy +export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview" +export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness" ``` -## Pick Target - -Use the smallest target that covers the request. - Dirty local work: ```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode local +"$AUTOREVIEW" --mode local ``` -Branch or PR work: +Branch or stacked PR work: ```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode branch --base origin/develop +base=$(gh pr view --json baseRefName --jq .baseRefName 2>/dev/null || echo develop) +"$AUTOREVIEW" --mode branch --base "origin/$base" ``` -If an open PR exists, prefer its actual base: +Single committed change: ```bash -base=$(gh pr view --json baseRefName --jq .baseRefName) -.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode branch --base "origin/$base" +"$AUTOREVIEW" --mode commit --commit HEAD ``` -Committed single change: +Whole EEGPrep-owned codebase audit: ```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode commit --commit HEAD +"$AUTOREVIEW" --mode codebase --thinking codex=xhigh ``` -Do not force local mode after committing. A clean local review only proves there -is no dirty patch. +The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope. + +## Useful Options -## Parallel Closeout +- `--engine codex|claude|droid|copilot`; default is Codex. +- `--reviewers codex,claude` or `--panel` for a multi-reviewer pass. +- `--model codex=gpt-5.1 --thinking codex=xhigh`; Claude also accepts `max`. +- `--stream-engine-output` to see compact live engine activity. +- `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs. +- `--prompt` / `--prompt-file` / `--dataset` to add evidence. +- `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts. +- `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing. +- `--skip-fetch` avoids fetching before branch diffs. +- `--heartbeat-seconds 60` controls long-run heartbeat cadence. -It is OK to run focused tests concurrently with review after formatting-sensitive -work is done: +Smoke check: ```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview \ - --parallel-tests "uv run pytest tests/test_pop_select.py" +"$AUTOREVIEW_HARNESS" --dry-run +"$AUTOREVIEW_HARNESS" --fixture buggy --engine codex ``` -If tests or review findings lead to edits, rerun the affected tests and rerun -autoreview. Stop when the final helper run exits 0 with no accepted/actionable -findings. Do not run another review only for cleaner wording. +On Windows, use: + +```powershell +python .agents\skills\oc-autoreview-adapted\scripts\autoreview --help +.agents\skills\oc-autoreview-adapted\scripts\test-review-harness.ps1 -Fixture buggy -Engine codex +``` ## EEGPrep Review Surface -The helper prompt asks Codex to prioritize: - -- correctness bugs, import/runtime failures, wrong numerical results, and broken - common workflows; -- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI behavior, event - semantics, and expected data structures; -- EEG dict fields including `data`, `nbchan`, `pnts`, `trials`, `srate`, - `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`, - `icaact`, `icawinv`, `icasphere`, `icaweights`, and `icachansind`; -- MATLAB/Python indexing boundaries, especially 1-based EEGLAB latencies and - user-facing indices versus 0-based Python arrays; -- channel-major shape assumptions: continuous `(nbchan, pnts)` and epoched - `(nbchan, pnts, trials)`; +Prioritize: + +- correctness, runtime/import failures, bad numerical results, broken common workflows; +- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI layout/behavior, events, and data structures; +- EEG dict invariants: `data`, `nbchan`, `pnts`, `trials`, `srate`, `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`, ICA fields; +- 1-based EEGLAB user indices/latencies versus 0-based Python array indices; +- channel-major continuous `(nbchan, pnts)` and epoched `(nbchan, pnts, trials)` data; - GUI plus `eegprep-console` synchronization through `EEGPrepSession`; -- `return_com=True`, `(EEG, com)` returns, history strings, and session update - paths for user-facing `pop_*` functions; -- runtime independence from `src/eegprep/eeglab/`; -- packaged Markdown help resources for GUI Help or `pophelp`; -- missing tests tied to changed behavior; -- concrete security, path, file I/O, and dependency risks; -- realistic EEG-size performance regressions. - -## Triage Findings - -Accept findings only when they are concrete and introduced or exposed by the -reviewed change. Reject: - -- pre-existing issues outside the diff; -- generic linter/formatter comments; -- broad refactors and speculative abstractions; -- unlikely edge cases that would complicate the code without protecting real - workflows; -- subjective MATLAB-vs-Python style preferences that do not break EEGPrep's - parity contract. - -For each accepted finding, fix the smallest ownership boundary that addresses -the bug. For each rejected finding, record the reason briefly in the final -report. Add an inline code comment only when it documents a real invariant that -future reviewers need to know. - -## Final Report - -Include: - -- review command used; -- tests/proof run; -- findings accepted, fixed, or rejected, briefly why; -- the clean result from the final helper run, or the exact remaining risk if a - finding was consciously left open. - -If the final helper run exits 0 and prints -`autoreview clean: no accepted/actionable findings reported`, report that run as -clean and stop. +- `return_com=True`, `(EEG, com)` returns, history replay, and session update paths; +- runtime independence from `src/eegprep/eeglab`; +- packaged Markdown help for GUI Help / `pophelp`; +- realistic EEG-size performance and concrete security/path/I/O risks. + +## Loop + +1. Format first if formatting can change line locations. +2. Run autoreview on the smallest sufficient target. +3. Verify each finding against code and AGENTS.md. +4. Fix accepted findings at the right ownership boundary. +5. Run focused tests, then broader tests if risk warrants. +6. Rerun the same autoreview target. +7. Final response: command used, tests run, findings fixed/rejected, and final clean result or remaining risk. + +If the helper prints `autoreview clean: no accepted/actionable findings reported` and exits 0, report that as clean and stop. diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml index 08ca5ab1..7c1c3100 100644 --- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml +++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "OC Autoreview Adapted" - short_description: "EEGPrep-focused autoreview workflow" - default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep changes before closeout." + short_description: "EEGPrep autoreview for diffs or codebase audits" + default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real findings." diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview index 751fb963..5fdee362 100755 --- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview +++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -from __future__ import annotations - -# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper. +# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper: +# https://github.com/openclaw/agent-skills/tree/main/skills/autoreview +# # Original copyright (c) 2026 openclaw. # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -21,35 +21,31 @@ from __future__ import annotations # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import annotations import argparse +import concurrent.futures +import copy import json import os +import queue import subprocess import sys import tempfile import textwrap +import threading import time from pathlib import Path -from typing import Any +from typing import Any, Callable +ENGINES = ("codex", "claude", "droid", "copilot") DEFAULT_BASE = "origin/develop" TRUNK_BRANCHES = {"develop", "main", "master"} -REPORT_KEYS = { - "findings", - "overall_correctness", - "overall_explanation", - "overall_confidence", -} -FINDING_KEYS = { - "title", - "body", - "priority", - "confidence", - "category", - "code_location", -} +CODEBASE_EXCLUDED_PREFIXES = ( + "src/eegprep/eeglab/", + "sample_data/", +) CATEGORIES = { "bug", "security", @@ -62,6 +58,13 @@ CATEGORIES = { "docs_help", "performance", } +THINKING_LEVELS_BY_ENGINE = { + "codex": {"low", "medium", "high", "xhigh"}, + "claude": {"low", "medium", "high", "xhigh", "max"}, + "droid": set(), + "copilot": set(), +} + SCHEMA: dict[str, Any] = { "type": "object", @@ -91,7 +94,10 @@ SCHEMA: dict[str, Any] = { "body": {"type": "string", "minLength": 1, "maxLength": 2400}, "priority": {"type": "string", "enum": ["P0", "P1", "P2", "P3"]}, "confidence": {"type": "number", "minimum": 0, "maximum": 1}, - "category": {"type": "string", "enum": sorted(CATEGORIES)}, + "category": { + "type": "string", + "enum": sorted(CATEGORIES), + }, "code_location": { "type": "object", "additionalProperties": False, @@ -115,11 +121,7 @@ SCHEMA: dict[str, Any] = { def run( - args: list[str], - cwd: Path, - *, - input_text: str | None = None, - check: bool = True, + args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True ) -> subprocess.CompletedProcess[str]: result = subprocess.run( args, @@ -130,11 +132,8 @@ def run( stderr=subprocess.PIPE, ) if check and result.returncode != 0: - command = " ".join(args) - raise SystemExit( - f"command failed ({result.returncode}): {command}\n" - f"{result.stderr or result.stdout}" - ) + cmd = " ".join(args) + raise SystemExit(f"command failed ({result.returncode}): {cmd}\n{result.stderr or result.stdout}") return result @@ -142,15 +141,26 @@ def run_with_heartbeat( args: list[str], cwd: Path, *, - input_text: str, + input_text: str | None = None, label: str, - heartbeat_seconds: int, + heartbeat_seconds: int = 60, + stream_output: bool = False, + stream_display: Callable[[str, str], str | None] | None = None, ) -> subprocess.CompletedProcess[str]: + if stream_output: + return run_with_stream( + args, + cwd, + input_text=input_text, + label=label, + heartbeat_seconds=heartbeat_seconds, + stream_display=stream_display, + ) started = time.monotonic() proc = subprocess.Popen( args, cwd=cwd, - stdin=subprocess.PIPE, + stdin=subprocess.PIPE if input_text is not None else None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, @@ -166,11 +176,87 @@ def run_with_heartbeat( except subprocess.TimeoutExpired: first_communicate = False elapsed = int(time.monotonic() - started) - print( - f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", - file=sys.stderr, - flush=True, - ) + print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) + + +def run_with_stream( + args: list[str], + cwd: Path, + *, + input_text: str | None, + label: str, + heartbeat_seconds: int, + stream_display: Callable[[str, str], str | None] | None, +) -> subprocess.CompletedProcess[str]: + started = time.monotonic() + proc = subprocess.Popen( + args, + cwd=cwd, + stdin=subprocess.PIPE if input_text is not None else None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + events: queue.Queue[tuple[str, str | None]] = queue.Queue() + stdout_parts: list[str] = [] + stderr_parts: list[str] = [] + + def read_stream(name: str, stream: Any) -> None: + try: + for line in iter(stream.readline, ""): + events.put((name, line)) + finally: + events.put((name, None)) + + def write_stdin() -> None: + if proc.stdin is None or input_text is None: + return + try: + proc.stdin.write(input_text) + proc.stdin.close() + except BrokenPipeError: + return + + threads = [ + threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True), + threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True), + ] + for thread in threads: + thread.start() + stdin_thread = threading.Thread(target=write_stdin, daemon=True) + stdin_thread.start() + + open_streams = 2 + while open_streams: + try: + name, line = events.get(timeout=heartbeat_seconds) + except queue.Empty: + elapsed = int(time.monotonic() - started) + print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) + continue + if line is None: + open_streams -= 1 + continue + if name == "stdout": + stdout_parts.append(line) + else: + stderr_parts.append(line) + display = stream_display(name, line) if stream_display else line + if display: + target = sys.stdout if name == "stdout" else sys.stderr + target.write(display) + target.flush() + + for thread in threads: + thread.join() + stdin_thread.join(timeout=1) + returncode = proc.wait() + return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts)) + + +def git(repo: Path, *args: str, check: bool = True) -> str: + return run([resolve_command("git", repo), *args], repo, check=check).stdout def repo_root() -> Path: @@ -200,13 +286,8 @@ def discover_repo_root(start: Path) -> Path | None: current = current.parent -def git(repo: Path, *args: str, check: bool = True) -> str: - return run([resolve_command("git", repo), *args], repo, check=check).stdout - - def current_branch(repo: Path) -> str: - branch = git(repo, "branch", "--show-current", check=False).strip() - return branch or "detached" + return git(repo, "branch", "--show-current", check=False).strip() or "detached" def is_dirty(repo: Path) -> bool: @@ -214,17 +295,19 @@ def is_dirty(repo: Path) -> bool: def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]: - normalized = "local" if mode == "uncommitted" else mode + mode = "local" if mode == "uncommitted" else mode branch = current_branch(repo) - if normalized == "local" or (normalized == "auto" and is_dirty(repo)): + if mode == "local" or (mode == "auto" and is_dirty(repo)): return "local", None - if normalized == "commit": + if mode == "codebase": + return "codebase", None + if mode == "commit": return "commit", None - if normalized == "branch" or (normalized == "auto" and branch not in TRUNK_BRANCHES): + if mode == "branch" or (mode == "auto" and branch not in TRUNK_BRANCHES): return "branch", base_ref or detect_pr_base(repo) or DEFAULT_BASE raise SystemExit( "no review target: clean trunk checkout and no forced mode. " - "Pass --mode branch --base or --mode commit --commit ." + "Pass --mode codebase, --mode branch --base , or --mode commit --commit ." ) @@ -232,24 +315,16 @@ def detect_pr_base(repo: Path) -> str | None: gh_bin = find_command("gh", repo) if not gh_bin: return None - result = run( - [gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], - repo, - check=False, - ) + result = run([gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], repo, check=False) base = result.stdout.strip() - if result.returncode != 0 or not base: - return None - return f"origin/{base}" + return f"origin/{base}" if result.returncode == 0 and base else None def resolve_command(name: str, repo: Path) -> str: resolved = find_command(name, repo) if resolved: return resolved - raise SystemExit( - f"executable not found: {name}. Install it or pass an explicit trusted path." - ) + raise SystemExit(f"executable not found: {name}. Install it or pass an explicit trusted path when supported.") def find_command(name: str, repo: Path) -> str | None: @@ -276,18 +351,18 @@ def find_command(name: str, repo: Path) -> str | None: return None +def is_within(path: Path, root: Path) -> bool: + return path == root or path.is_relative_to(root) + + def has_directory_component(name: str, command: Path) -> bool: separators = [separator for separator in (os.sep, os.altsep) if separator] - return command.is_absolute() or bool(command.drive) or any( - separator in name for separator in separators - ) + return command.is_absolute() or bool(command.drive) or any(separator in name for separator in separators) def first_executable_candidate(path: Path, *, reject_root: Path | None = None) -> str | None: if os.name == "nt" and not path.suffix: - extensions = [ - ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext - ] + extensions = [ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext] candidates = [path.with_suffix(ext.lower()) for ext in extensions] candidates.extend(path.with_suffix(ext.upper()) for ext in extensions) candidates.append(path) @@ -305,11 +380,7 @@ def first_executable_candidate(path: Path, *, reject_root: Path | None = None) - return None -def is_within(path: Path, root: Path) -> bool: - return path == root or path.is_relative_to(root) - - -def bounded(text: str, limit: int = 200_000) -> str: +def bounded(text: str, limit: int = 180_000) -> str: if len(text) <= limit: return text return text[:limit] + f"\n\n[truncated at {limit} characters]\n" @@ -322,14 +393,15 @@ def bounded_field(text: str, limit: int) -> str: return text[: max(0, limit - len(suffix))] + suffix -def read_text(path: Path, limit: int = 50_000) -> str: +def read_text(path: Path, limit: int = 40_000) -> str: try: data = path.read_bytes() except OSError as exc: return f"[unreadable: {exc}]" if b"\0" in data: return "[binary file omitted]" - return bounded(data.decode("utf-8", errors="replace"), limit) + text = data.decode("utf-8", errors="replace") + return bounded(text, limit) def local_bundle(repo: Path) -> str: @@ -343,13 +415,12 @@ def local_bundle(repo: Path) -> str: git(repo, "diff", "--stat"), bounded(git(repo, "diff", "--patch", "--find-renames")), ] - untracked = [ - line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line - ] + untracked = [line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line] if untracked: parts.append("# Untracked Files") for rel in untracked: - parts.append(f"## {rel}\n{read_text(repo / rel)}") + path = repo / rel + parts.append(f"## {rel}\n{read_text(path)}") return "\n\n".join(parts) @@ -366,6 +437,37 @@ def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str: ) +def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]: + prefixes = [scope.strip().rstrip("/") for scope in scopes or [] if scope.strip()] + paths: set[str] = set() + for rel in git(repo, "ls-files").splitlines(): + if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES): + continue + if prefixes and not any(rel == prefix or rel.startswith(f"{prefix}/") for prefix in prefixes): + continue + paths.add(rel) + return paths + + +def codebase_bundle(repo: Path, paths: set[str]) -> str: + grouped: dict[str, int] = {} + for rel in paths: + top = rel.split("/", 1)[0] + grouped[top] = grouped.get(top, 0) + 1 + inventory = "\n".join(f"- {path}" for path in sorted(paths)) + summary = "\n".join(f"- {name}: {count} tracked files" for name, count in sorted(grouped.items())) + return "\n\n".join( + [ + "# Codebase Audit", + "This is a repository-wide audit, not a diff review. The reviewer may inspect in-scope files with read-only tools.", + "# Scope Summary", + summary or "[no tracked files]", + "# In-Scope Tracked Files", + bounded(inventory, 120_000), + ] + ) + + def commit_bundle(repo: Path, commit_ref: str) -> str: return "\n\n".join( [ @@ -379,6 +481,8 @@ def commit_bundle(repo: Path, commit_ref: str) -> str: def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: str) -> set[str]: names: set[str] = set() + if target == "codebase": + return codebase_paths(repo) if target == "local": sources = [ git(repo, "diff", "--name-only", "--cached"), @@ -386,8 +490,7 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st git(repo, "ls-files", "--others", "--exclude-standard"), ] elif target == "branch": - if target_ref is None: - raise SystemExit("internal error: branch target missing base ref") + assert target_ref sources = [git(repo, "diff", "--name-only", f"{target_ref}...HEAD")] else: sources = [git(repo, "show", "--name-only", "--format=", commit_ref)] @@ -399,9 +502,28 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st return names -def instruction_paths(repo: Path, changed_paths: set[str]) -> list[Path]: +def load_extra_prompt(args: argparse.Namespace) -> str: + chunks: list[str] = [] + for value in args.prompt or []: + chunks.append(value) + for path in args.prompt_file or []: + chunks.append(Path(path).read_text()) + return "\n\n".join(chunks) + + +def load_datasets(args: argparse.Namespace) -> str: + chunks: list[str] = [] + for spec in args.dataset or []: + path = Path(spec) + if path.is_dir(): + raise SystemExit(f"--dataset must be a file, got directory: {path}") + chunks.append(f"# Dataset: {path}\n{read_text(path)}") + return "\n\n".join(chunks) + + +def instruction_paths(repo: Path, scope_paths: set[str]) -> list[Path]: paths = {repo / "AGENTS.md"} - for rel in changed_paths: + for rel in scope_paths: rel_path = Path(rel) if rel_path.is_absolute() or ".." in rel_path.parts: continue @@ -416,106 +538,76 @@ def instruction_paths(repo: Path, changed_paths: set[str]) -> list[Path]: return sorted(path for path in paths if path.exists()) -def instruction_bundle(repo: Path, changed_paths: set[str]) -> str: - paths = instruction_paths(repo, changed_paths) +def instruction_bundle(repo: Path, scope_paths: set[str]) -> str: + paths = instruction_paths(repo, scope_paths) if not paths: return "# Repository Instructions\n[no AGENTS.md files found]" parts = ["# Repository Instructions"] for path in paths: rel = path.relative_to(repo) - parts.append(f"## {rel}\n{read_text(path)}") + parts.append(f"## {rel}\n{read_text(path, limit=80_000)}") return "\n\n".join(parts) -def load_extra_prompt(args: argparse.Namespace) -> str: - chunks: list[str] = [] - for value in args.prompt or []: - chunks.append(value) - for path in args.prompt_file or []: - chunks.append(Path(path).read_text()) - return "\n\n".join(chunks) - - -def load_datasets(args: argparse.Namespace) -> str: - chunks: list[str] = [] - for spec in args.dataset or []: - path = Path(spec) - if path.is_dir(): - raise SystemExit(f"--dataset must be a file, got directory: {path}") - chunks.append(f"# Dataset: {path}\n{read_text(path)}") - return "\n\n".join(chunks) - - def build_prompt( repo: Path, target: str, target_ref: str | None, - changed_paths: set[str], + scope_paths: set[str], instructions: str, bundle: str, extra_prompt: str, datasets: str, ) -> str: target_line = f"{target} {target_ref}" if target_ref else target - changed = "\n".join(f"- {path}" for path in sorted(changed_paths)) or "[no changed paths]" + scope_label = "Changed Paths" if target != "codebase" else "Review Scope" + scope_list = "\n".join(f"- {path}" for path in sorted(scope_paths)) or "[no paths]" + scope_rule = ( + "Report only actionable defects introduced or exposed by this change." + if target != "codebase" + else ( + "Report concrete actionable defects in the in-scope EEGPrep-owned codebase. " + "Do not require diff provenance, but do avoid stale vendored/reference code and vague wishlist items." + ) + ) return textwrap.dedent( f""" - You are a senior EEGPrep code reviewer. Review the provided git change bundle only. - Be autonomous: inspect files as needed, reason through the changed behavior, and return a - structured result without asking follow-up questions. + You are a senior EEGPrep code reviewer. Review the provided bundle and inspect files as needed. Hard rules: - Return exactly one JSON object and nothing else. Do not wrap it in Markdown. - The JSON object must match this schema exactly: {json.dumps(SCHEMA, indent=2)} - Do not modify files. - - Do not invoke nested reviewers or review tools. Forbidden commands include: - codex review, autoreview, oracle review, and any reviewer-panel workflow. - - You may use read-only tools and web search to inspect source files, dependency docs, - EEGLAB reference behavior, current APIs, and security implications. - - Shell commands, if available, must be read-only inspection commands. Do not run tests, - formatters, package installs, generators, git mutation commands, or commands that write files. - - Report only actionable defects introduced or exposed by this change. - - Prefer high-signal findings over style feedback. False positives waste maintainer time. + - Do not invoke nested reviewers or review tools. + - Forbidden nested review commands include: codex review, autoreview, claude review, oracle review. + - You may use read-only tools and web search to inspect files, dependency contracts, upstream docs, current behavior, and security implications. + - Shell commands, if available, must be read-only inspection commands. Do not run tests, formatters, package installs, generators, network mutation commands, git mutation commands, or commands that write files. + - {scope_rule} + - Prefer high-signal findings over style feedback. + - Include security findings only for concrete risks: injection, secret leaks, authz/authn bypass, path traversal, unsafe deserialization, unsafe filesystem/shell use, privacy leaks, and credential handling. + - Do not reject legitimate functionality merely because it touches shell, filesystem, network, auth, or sensitive data. Report a security finding only when the patch creates a concrete exploitable risk, removes an important safety check, or lacks validation at a trust boundary. - For each finding, use the smallest file/line location that demonstrates the issue. - If there are no actionable findings, return an empty findings array and mark the patch correct. EEGPrep review priorities: - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows. - - EEGLAB parity regressions in public API behavior, pop_* wrappers, history commands, - GUI layout/behavior, event semantics, and expected data structures. - - EEG dict semantics for data, nbchan, pnts, trials, srate, xmin, xmax, times, - chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere, - icaweights, and icachansind. - - MATLAB/Python boundary mistakes, especially 1-based EEGLAB event latencies and - user-facing indices versus 0-based Python arrays. - - Channel-major data shape assumptions: continuous data is usually (nbchan, pnts), - and epoched data is usually (nbchan, pnts, trials). - - GUI plus eegprep-console session sync: EEG, ALLEEG, CURRENTSET, LASTCOM, ALLCOM, - STUDY, and CURRENTSTUDY must stay synchronized through EEGPrepSession helpers. - - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings, - and GUI/session update paths. - - Runtime code must not depend on src/eegprep/eeglab existing. Use it only as a - development reference. - - User-facing GUI Help or pophelp behavior needs packaged Markdown help resources. - - Tests should cover realistic regressions. Suggest exact missing tests only when the - gap is tied to changed behavior. - - Security findings must be concrete: path traversal, unsafe shell/filesystem use, - unsafe deserialization, credential/privacy leaks, or trust-boundary validation loss. - - Performance findings must be realistic for EEG data sizes. - - Do not flag: - - Pre-existing issues outside the reviewed change. - - Generic linter/formatter comments. - - Broad refactors or speculative abstractions. - - Unlikely edge cases that would complicate the code without protecting real workflows. - - Subjective MATLAB-vs-Python style preferences unless they break EEGPrep's parity contract. + - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures. + - EEG dict fields: data, nbchan, pnts, trials, srate, xmin, xmax, times, chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere, icaweights, icachansind. + - MATLAB/Python indexing boundaries: EEGLAB event latencies and user-facing indices are usually 1-based; Python arrays are 0-based. + - Channel-major data shape: continuous (nbchan, pnts), epoched (nbchan, pnts, trials). + - GUI plus eegprep-console session sync through EEGPrepSession. + - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings, and GUI/session update paths. + - Runtime code must not depend on src/eegprep/eeglab existing; use it only as a development reference. + - GUI Help/pophelp needs EEGPrep-owned packaged Markdown resources. + - Missing tests only when tied to a concrete changed or audited behavior. + - Realistic EEG-size performance regressions. Review target: {target_line} Repository: {repo} - # Changed Paths - {changed} + # {scope_label} + {scope_list} {extra_prompt} @@ -537,6 +629,10 @@ def write_json_temp(data: dict[str, Any]) -> Path: def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if not args.tools: + raise SystemExit( + "--no-tools is not supported by the Codex engine; use --engine claude --no-tools for a no-tools run" + ) schema_path = write_json_temp(SCHEMA) output_path = Path(tempfile.NamedTemporaryFile("w", suffix=".json", delete=False).name) cmd = [resolve_command(args.codex_bin, repo), "--ask-for-approval", "never"] @@ -546,9 +642,11 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: cmd.extend(["--model", args.model]) if args.thinking: cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"']) + cmd.append("exec") + if args.stream_engine_output: + cmd.append("--json") cmd.extend( [ - "exec", "--ephemeral", "-C", str(repo), @@ -567,6 +665,8 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: input_text=prompt, label="codex", heartbeat_seconds=args.heartbeat_seconds, + stream_output=args.stream_engine_output, + stream_display=CodexStreamDisplay() if args.stream_engine_output else None, ) try: output = output_path.read_text() @@ -578,6 +678,258 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: return output or result.stdout +def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str: + cmd = [ + resolve_command(args.claude_bin, repo), + "--print", + "--no-session-persistence", + "--output-format", + "stream-json" if args.stream_engine_output else "json", + "--json-schema", + json.dumps(SCHEMA), + ] + if args.tools: + cmd.extend(["--allowedTools", claude_allowed_tools(args)]) + else: + cmd.extend(["--tools", ""]) + if args.stream_engine_output: + cmd.append("--verbose") + if args.model: + cmd.extend(["--model", args.model]) + if args.thinking: + cmd.extend(["--effort", args.thinking]) + result = run_with_heartbeat( + cmd, + repo, + input_text=prompt, + label="claude", + heartbeat_seconds=args.heartbeat_seconds, + stream_output=args.stream_engine_output, + stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None, + ) + if result.returncode != 0: + raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.thinking: + raise SystemExit("--thinking is not supported by the droid engine") + prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name) + prompt_path.write_text(prompt) + cmd = [ + resolve_command(args.droid_bin, repo), + "exec", + "--cwd", + str(repo), + "--output-format", + "json", + "-f", + str(prompt_path), + ] + if args.model: + cmd.extend(["--model", args.model]) + if not args.tools: + cmd.extend(["--disabled-tools", "*"]) + result = run_with_heartbeat( + cmd, + repo, + label="droid", + heartbeat_seconds=args.heartbeat_seconds, + stream_output=args.stream_engine_output, + ) + prompt_path.unlink(missing_ok=True) + if result.returncode != 0: + raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.thinking: + raise SystemExit("--thinking is not supported by the copilot engine") + if not args.tools: + raise SystemExit( + "--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv" + ) + with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir: + prompt_path = Path(tempdir) / "prompt.txt" + prompt_path.write_text(prompt) + os.chmod(prompt_path, 0o600) + cmd = [ + resolve_command(args.copilot_bin, repo), + "-C", + tempdir, + "-p", + "Read ./prompt.txt and follow it exactly. Return only the requested JSON object.", + "--output-format", + "json", + "--stream", + "on" if args.stream_engine_output else "off", + "--no-ask-user", + "--disable-builtin-mcps", + ] + if args.model: + cmd.extend(["--model", args.model]) + cmd.extend( + [ + "--available-tools=read_agent,rg,view,web_fetch", + "--allow-tool=read_agent", + "--allow-tool=rg", + "--allow-tool=view", + "--allow-tool=web_fetch", + ] + ) + if args.web_search: + cmd.append("--allow-all-urls") + result = run_with_heartbeat( + cmd, + Path(tempdir), + label="copilot", + heartbeat_seconds=args.heartbeat_seconds, + stream_output=args.stream_engine_output, + ) + if result.returncode != 0: + raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +class CodexStreamDisplay: + def __init__(self, *, activity_seconds: int = 20) -> None: + self.activity_seconds = activity_seconds + self.hidden_events = 0 + self.last_visible = time.monotonic() + + def __call__(self, name: str, line: str) -> str | None: + if name != "stdout": + return line + try: + event = json.loads(line) + except json.JSONDecodeError: + return self.visible(line) + event_type = event.get("type") + if event_type == "thread.started": + return self.visible(f"codex thread: {event.get('thread_id', '')}\n") + if event_type == "turn.started": + return self.visible("codex turn started\n") + if event_type == "turn.completed": + usage = event.get("usage") + message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n" + return self.visible(self.flush_hidden() + message) + item = event.get("item") + if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str): + return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n") + return self.hidden_activity() + + def hidden_activity(self) -> str | None: + self.hidden_events += 1 + if time.monotonic() - self.last_visible < self.activity_seconds: + return None + return self.visible(self.flush_hidden()) + + def flush_hidden(self) -> str: + if not self.hidden_events: + return "" + count = self.hidden_events + self.hidden_events = 0 + return f"codex activity: {count} hidden tool/status events\n" + + def visible(self, text: str) -> str: + self.last_visible = time.monotonic() + return text + + +class ClaudeStreamDisplay: + def __init__(self, *, activity_seconds: int = 20) -> None: + self.activity_seconds = activity_seconds + self.hidden_events = 0 + self.last_visible = time.monotonic() + self.started = False + + def __call__(self, name: str, line: str) -> str | None: + if name != "stdout": + return line + try: + event = json.loads(line) + except json.JSONDecodeError: + return self.visible(line) + event_type = event.get("type") + if event_type == "system" and not self.started: + self.started = True + return self.visible("claude turn started\n") + if event_type == "assistant": + return self.assistant_message(event) + if event_type == "result": + return self.visible(self.flush_hidden() + self.result_summary(event)) + return self.hidden_activity() + + def assistant_message(self, event: dict[str, Any]) -> str | None: + message = event.get("message") + if not isinstance(message, dict): + return self.hidden_activity() + chunks: list[str] = [] + for item in message.get("content", []): + if not isinstance(item, dict): + continue + if item.get("type") == "text" and isinstance(item.get("text"), str): + chunks.append(item["text"].rstrip()) + if chunks: + return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n") + return self.hidden_activity() + + def result_summary(self, event: dict[str, Any]) -> str: + usage = event.get("usage") + fields: list[str] = [] + if isinstance(usage, dict): + for key in ( + "input_tokens", + "cache_read_input_tokens", + "cache_creation_input_tokens", + "output_tokens", + ): + value = usage.get(key) + if isinstance(value, int): + fields.append(f"{key}={value}") + cost = event.get("total_cost_usd") + if isinstance(cost, (int, float)) and not isinstance(cost, bool): + fields.append(f"cost_usd={cost:.6f}") + return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n" + + def hidden_activity(self) -> str | None: + self.hidden_events += 1 + if time.monotonic() - self.last_visible < self.activity_seconds: + return None + return self.visible(self.flush_hidden()) + + def flush_hidden(self) -> str: + if not self.hidden_events: + return "" + count = self.hidden_events + self.hidden_events = 0 + return f"claude activity: {count} hidden tool/status events\n" + + def visible(self, text: str) -> str: + self.last_visible = time.monotonic() + return text + + +def format_codex_usage(usage: dict[str, Any]) -> str: + fields = [ + "input_tokens", + "cached_input_tokens", + "output_tokens", + "reasoning_output_tokens", + ] + parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)] + return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable" + + +def claude_allowed_tools(args: argparse.Namespace) -> str: + tools = [tool.strip() for tool in args.claude_allowed_tools.split(",") if tool.strip()] + if not args.web_search: + tools = [tool for tool in tools if tool not in {"WebSearch", "WebFetch"}] + return ",".join(tools) + + def extract_json(text: str) -> dict[str, Any]: stripped = text.strip() if not stripped: @@ -585,9 +937,9 @@ def extract_json(text: str) -> dict[str, Any]: try: parsed = json.loads(stripped) except json.JSONDecodeError as exc: - candidate = parse_json_candidate(stripped) - if isinstance(candidate, dict) and "findings" in candidate: - return candidate + fenced_report = parse_json_candidate(stripped) + if isinstance(fenced_report, dict) and "findings" in fenced_report: + return fenced_report jsonl_report = extract_json_from_jsonl(stripped) if jsonl_report: return jsonl_report @@ -656,12 +1008,9 @@ def parse_json_candidate(text: str) -> Any | None: return parsed -def validate_report( - report: dict[str, Any], - changed_paths: set[str], - required: list[str], -) -> None: - extra_top = set(report) - REPORT_KEYS +def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None: + allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"} + extra_top = set(report) - allowed_top if extra_top: raise SystemExit(f"review JSON has unexpected top-level keys: {sorted(extra_top)}") for key in SCHEMA["required"]: @@ -677,21 +1026,47 @@ def validate_report( raise SystemExit("review JSON overall_explanation is too long") if not number_in_range(report.get("overall_confidence")): raise SystemExit("review JSON overall_confidence must be numeric") - + finding_text = "" kept_findings: list[dict[str, Any]] = [] ignored_findings: list[tuple[int, dict[str, Any], str, int]] = [] - finding_text = "" for index, finding in enumerate(report["findings"]): - validate_finding(index, finding) - location = finding["code_location"] - rel = str(location["file_path"]).strip() - line = int(location["line"]) + if not isinstance(finding, dict): + raise SystemExit(f"finding {index} must be an object") + allowed_finding = {"title", "body", "priority", "confidence", "category", "code_location"} + extra_finding = set(finding) - allowed_finding + if extra_finding: + raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}") + for key in allowed_finding: + if key not in finding: + raise SystemExit(f"finding {index} missing required key: {key}") + title = finding.get("title") + if not isinstance(title, str) or not title or len(title) > 140: + raise SystemExit(f"finding {index} has invalid title") + body = finding.get("body") + if not isinstance(body, str) or not body or len(body) > 2400: + raise SystemExit(f"finding {index} has invalid body") + priority = finding.get("priority") + if priority not in {"P0", "P1", "P2", "P3"}: + raise SystemExit(f"finding {index} has invalid priority: {priority}") + if not number_in_range(finding.get("confidence")): + raise SystemExit(f"finding {index} has invalid confidence") + category = finding.get("category") + if category not in CATEGORIES: + raise SystemExit(f"finding {index} has invalid category: {category}") + location = finding.get("code_location") + if not isinstance(location, dict): + raise SystemExit(f"finding {index} missing code_location") + rel = str(location.get("file_path", "")).strip() + line = location.get("line") + if not rel or not isinstance(line, int) or line < 1: + raise SystemExit(f"finding {index} has invalid location: {location}") + if Path(rel).is_absolute() or ".." in Path(rel).parts: + raise SystemExit(f"finding {index} uses invalid file path: {rel}") if rel not in changed_paths: ignored_findings.append((index, finding, rel, line)) continue kept_findings.append(finding) finding_text += "\n" + json.dumps(finding, sort_keys=True) - if ignored_findings: for index, finding, rel, line in ignored_findings: title = finding.get("title", "") @@ -706,118 +1081,317 @@ def validate_report( explanation = report["overall_explanation"].rstrip() report["overall_correctness"] = "patch is correct" report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000) - haystack = finding_text.lower() for needle in required: if needle.lower() not in haystack: raise SystemExit(f"required finding text not found: {needle}") -def validate_finding(index: int, finding: Any) -> None: - if not isinstance(finding, dict): - raise SystemExit(f"finding {index} must be an object") - extra_finding = set(finding) - FINDING_KEYS - if extra_finding: - raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}") - for key in FINDING_KEYS: - if key not in finding: - raise SystemExit(f"finding {index} missing required key: {key}") - title = finding.get("title") - if not isinstance(title, str) or not title or len(title) > 140: - raise SystemExit(f"finding {index} has invalid title") - body = finding.get("body") - if not isinstance(body, str) or not body or len(body) > 2400: - raise SystemExit(f"finding {index} has invalid body") - priority = finding.get("priority") - if priority not in {"P0", "P1", "P2", "P3"}: - raise SystemExit(f"finding {index} has invalid priority: {priority}") - if not number_in_range(finding.get("confidence")): - raise SystemExit(f"finding {index} has invalid confidence") - category = finding.get("category") - if category not in CATEGORIES: - raise SystemExit(f"finding {index} has invalid category: {category}") - location = finding.get("code_location") - if not isinstance(location, dict): - raise SystemExit(f"finding {index} missing code_location") - rel = str(location.get("file_path", "")).strip() - line = location.get("line") - if not rel or not isinstance(line, int) or line < 1: - raise SystemExit(f"finding {index} has invalid location: {location}") - path = Path(rel) - if path.is_absolute() or ".." in path.parts: - raise SystemExit(f"finding {index} uses invalid file path: {rel}") - - def number_in_range(value: Any) -> bool: return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1 -def print_report(report: dict[str, Any]) -> None: +def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None: findings = report["findings"] if findings: - print(f"autoreview findings: {len(findings)}") + print(f"{label} findings: {len(findings)}") elif report["overall_correctness"] == "patch is incorrect": - print("autoreview verdict: patch is incorrect without discrete findings") + print(f"{label} verdict: patch is incorrect without discrete findings") else: - print("autoreview clean: no accepted/actionable findings reported") + print(f"{label} clean: no accepted/actionable findings reported") for finding in findings: loc = finding["code_location"] - print(f"[{finding['priority']}] {finding['title']} ({finding['category']})") + print(f"[{finding['priority']}] {finding['title']}") print(f"{loc['file_path']}:{loc['line']}") - print(finding["body"]) + print(f"{finding['body']}") print() print(f"overall: {report['overall_correctness']} ({report['overall_confidence']})") print(report["overall_explanation"]) -def start_parallel_tests(command: str, repo: Path) -> tuple[subprocess.Popen[Any], float]: +def start_parallel_tests(command: str, repo: Path, shell_kind: str) -> tuple[subprocess.Popen, float]: print(f"tests: {command}") - return subprocess.Popen(command, cwd=repo, shell=True), time.time() - - -def finish_parallel_tests(proc: subprocess.Popen[Any], started: float) -> int: + if shell_kind == "default" or shell_kind == "cmd": + return subprocess.Popen(command, cwd=repo, shell=True), time.time() + if shell_kind == "powershell": + powershell = resolve_command("powershell", repo) + return subprocess.Popen( + [powershell, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command], + cwd=repo, + ), time.time() + if shell_kind == "pwsh": + pwsh = resolve_command("pwsh", repo) + return subprocess.Popen( + [pwsh, "-NoProfile", "-Command", command], + cwd=repo, + ), time.time() + raise SystemExit(f"invalid --parallel-tests-shell/AUTOREVIEW_PARALLEL_TESTS_SHELL: {shell_kind}") + + +def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int: proc.wait() print(f"tests exit: {proc.returncode} after {int(time.time() - started)}s") return int(proc.returncode or 0) -def parse_args(argv: list[str]) -> argparse.Namespace: - parser = argparse.ArgumentParser(description="EEGPrep bundle-driven autonomous code review.") - parser.add_argument("--mode", choices=["auto", "local", "uncommitted", "branch", "commit"], default="auto") +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Bundle-driven AI code review.") + parser.add_argument( + "--mode", + choices=["auto", "local", "uncommitted", "branch", "commit", "codebase"], + default="auto", + ) parser.add_argument("--base") parser.add_argument("--commit", default="HEAD") - parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex")) - parser.add_argument("--model", default=os.environ.get("AUTOREVIEW_MODEL")) + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.") + parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex")) + parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.") + parser.add_argument( + "--panel", + action="store_true", + help="Run a Codex/Claude review panel unless --engine changes the first reviewer.", + ) + parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.") parser.add_argument( "--thinking", - choices=["low", "medium", "high", "xhigh"], - default=os.environ.get("AUTOREVIEW_THINKING", "high"), + action="append", + help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.", + ) + parser.add_argument( + "--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails." + ) + parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex")) + parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude")) + parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid")) + parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot")) + parser.add_argument( + "--no-tools", + dest="tools", + action="store_false", + default=True, + help="Disable tools for engines that support it. Codex and copilot reject no-tools review.", ) parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True) + parser.add_argument( + "--claude-allowed-tools", + default=os.environ.get( + "AUTOREVIEW_CLAUDE_TOOLS", + "Read,Grep,Glob,WebSearch,WebFetch", + ), + ) parser.add_argument("--prompt", action="append", help="Additional review instruction text.") parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.") - parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the bundle.") + parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.") parser.add_argument("--output", help="Write human output to a file as well as stdout.") parser.add_argument("--json-output", help="Write validated structured review JSON.") - parser.add_argument("--parallel-tests", help="Run a focused test command concurrently with review.") - parser.add_argument("--require-finding", action="append", default=[], help="Require finding text to contain this substring.") - parser.add_argument("--expect-findings", action="store_true", help="Treat findings as success for harness checks.") - parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.") parser.add_argument("--heartbeat-seconds", type=int, default=60) - parser.add_argument("--dry-run", action="store_true", help="Resolve target and bundle context without calling Codex.") - return parser.parse_args(argv) + parser.add_argument( + "--stream-engine-output", + action="store_true", + default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1", + help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.", + ) + parser.add_argument( + "--parallel-tests", help="Run a test command concurrently with review; failure fails the helper." + ) + parser.add_argument( + "--parallel-tests-shell", + choices=["default", "cmd", "powershell", "pwsh"], + default=os.environ.get("AUTOREVIEW_PARALLEL_TESTS_SHELL", "default"), + help="Shell for --parallel-tests. Default preserves Python shell=True platform behavior; use powershell or pwsh for PowerShell-specific commands.", + ) + parser.add_argument( + "--require-finding", action="append", default=[], help="Require finding text to contain this substring." + ) + parser.add_argument( + "--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests." + ) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args() + if args.engine not in ENGINES: + raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}") + return args + + +def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.engine == "codex": + return run_codex(args, repo, prompt) + if args.engine == "claude": + return run_claude(args, repo, prompt) + if args.engine == "droid": + return run_droid(args, repo, prompt) + if args.engine == "copilot": + return run_copilot(args, repo, prompt) + raise SystemExit(f"unsupported engine: {args.engine}") + + +def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]: + global_value: str | None = None + per_engine: dict[str, str] = {} + for raw in values or []: + value = raw.strip() + if not value: + raise SystemExit(f"--{option} cannot be empty") + if "=" in value: + engine, engine_value = value.split("=", 1) + engine = engine.strip() + engine_value = engine_value.strip() + if engine not in ENGINES: + raise SystemExit(f"--{option} uses unknown engine: {engine}") + if not engine_value: + raise SystemExit(f"--{option} for {engine} cannot be empty") + if engine in per_engine: + raise SystemExit(f"--{option} specified more than once for {engine}") + per_engine[engine] = engine_value + else: + if global_value is not None: + raise SystemExit(f"--{option} global value specified more than once") + global_value = value + return global_value, per_engine + + +def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]: + parts = [part.strip() for part in token.split(":")] + if len(parts) > 3 or not parts[0]: + raise SystemExit(f"invalid reviewer spec: {token}") + engine = parts[0] + if engine not in ENGINES: + raise SystemExit(f"unknown reviewer engine: {engine}") + model = parts[1] if len(parts) >= 2 and parts[1] else None + thinking = parts[2] if len(parts) == 3 and parts[2] else None + return engine, model, thinking + + +def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]: + global_model, model_by_engine = parse_keyed_options(args.model, "model") + global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking") + reviewers: list[tuple[str, str | None, str | None]] = [] + if args.reviewers: + tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()] + if len(tokens) == 1 and tokens[0] == "all": + tokens = list(ENGINES) + reviewers = [parse_reviewer_token(token) for token in tokens] + elif args.panel: + engines = [args.engine] + for engine in ("codex", "claude"): + if engine not in engines: + engines.append(engine) + reviewers = [(engine, None, None) for engine in engines] + else: + reviewers = [(args.engine, None, None)] + + seen: set[str] = set() + result: list[argparse.Namespace] = [] + for engine, inline_model, inline_thinking in reviewers: + if engine in seen: + raise SystemExit(f"reviewer specified more than once: {engine}") + seen.add(engine) + model = inline_model or model_by_engine.get(engine) or global_model + thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking + if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]: + valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none" + raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})") + clone = copy.copy(args) + clone.engine = engine + clone.model = model + clone.thinking = thinking + result.append(clone) + return result -def main(argv: list[str]) -> int: - args = parse_args(argv) +def reviewer_label(args: argparse.Namespace) -> str: + parts = [args.engine] + if args.model: + parts.append(f"model={args.model}") + if args.thinking: + parts.append(f"thinking={args.thinking}") + return " ".join(parts) + + +def run_reviewer( + args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str] +) -> dict[str, Any]: + raw = run_engine(args, repo, prompt) + report = extract_json(raw) + validate_report(report, repo, changed_paths, required) + return report + + +def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]: + findings: list[dict[str, Any]] = [] + seen: set[tuple[str, int, str, str]] = set() + for label, report in reports: + for finding in report["findings"]: + location = finding["code_location"] + key = ( + location["file_path"], + location["line"], + finding["category"], + " ".join(finding["title"].lower().split()), + ) + if key in seen: + continue + seen.add(key) + merged = copy.deepcopy(finding) + merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2400) + findings.append(merged) + incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports) + summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports) + return { + "findings": findings, + "overall_correctness": "patch is incorrect" if incorrect else "patch is correct", + "overall_explanation": f"Panel review complete. {summary}.", + "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5), + } + + +def run_panel( + args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str] +) -> dict[str, Any]: + reports: list[tuple[str, dict[str, Any]]] = [] + failures: list[str] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor: + future_by_label = { + executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer) + for reviewer in reviewers + } + for future in concurrent.futures.as_completed(future_by_label): + label = future_by_label[future] + try: + reports.append((label, future.result())) + except SystemExit as exc: + failures.append(f"{label}: {exc}") + except Exception as exc: + failures.append(f"{label}: {exc}") + if failures and not args.allow_partial_panel: + raise SystemExit("autoreview panel failed\n" + "\n".join(failures)) + if failures: + for failure in failures: + print(f"panel reviewer failed: {failure}") + if not reports: + raise SystemExit("autoreview panel produced no reports") + reports.sort(key=lambda item: item[0]) + report = merge_panel_reports(reports) + validate_report(report, repo, changed_paths, args.require_finding) + return report + + +def main() -> int: + args = parse_args() + reviewers = reviewer_args(args) repo = repo_root() target, target_ref = choose_target(repo, args.mode, args.base) print(f"autoreview target: {target}") print(f"branch: {current_branch(repo)}") - print("engine: codex") - if args.model: - print(f"model: {args.model}") - print(f"thinking: {args.thinking}") + if len(reviewers) == 1 and not args.reviewers and not args.panel: + print(f"engine: {reviewers[0].engine}") + if reviewers[0].model: + print(f"model: {reviewers[0].model}") + if reviewers[0].thinking: + print(f"thinking: {reviewers[0].thinking}") + else: + print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}") + print(f"tools: {'on' if args.tools else 'off'}") print(f"web_search: {'on' if args.web_search else 'off'}") display_ref = args.commit if target == "commit" else target_ref if display_ref: @@ -826,13 +1400,17 @@ def main(argv: list[str]) -> int: if target == "local": bundle = local_bundle(repo) elif target == "branch": - if target_ref is None: - raise SystemExit("internal error: branch target missing base ref") + assert target_ref bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch) + elif target == "codebase": + changed_paths = codebase_paths(repo) + bundle = codebase_bundle(repo, changed_paths) else: bundle = commit_bundle(repo, args.commit) target_ref = args.commit - changed_paths = review_paths(repo, target, target_ref, args.commit) + changed_paths = review_paths(repo, target, target_ref, args.commit) + if target != "codebase": + changed_paths = review_paths(repo, target, target_ref, args.commit) instructions = instruction_bundle(repo, changed_paths) prompt = build_prompt( repo, @@ -844,28 +1422,32 @@ def main(argv: list[str]) -> int: load_extra_prompt(args), load_datasets(args), ) - print(f"changed paths: {len(changed_paths)}") + print(f"scope paths: {len(changed_paths)}") print(f"bundle: {len(prompt)} chars") if args.dry_run: return 0 - tests_proc: tuple[subprocess.Popen[Any], float] | None = None + tests_proc: tuple[subprocess.Popen, float] | None = None if args.parallel_tests: - tests_proc = start_parallel_tests(args.parallel_tests, repo) + tests_proc = start_parallel_tests(args.parallel_tests, repo, args.parallel_tests_shell) try: - raw = run_codex(args, repo, prompt) - report = extract_json(raw) - validate_report(report, changed_paths, args.require_finding) + if len(reviewers) == 1: + report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding) + label = "autoreview" + else: + report = run_panel(args, reviewers, repo, prompt, changed_paths) + label = "autoreview panel" if args.json_output: Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n") + if args.output: original_stdout = sys.stdout with Path(args.output).open("w") as handle: sys.stdout = Tee(original_stdout, handle) - print_report(report) + print_report(report, label=label) sys.stdout = original_stdout else: - print_report(report) + print_report(report, label=label) finally: tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0 @@ -892,4 +1474,4 @@ class Tee: if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) + raise SystemExit(main()) diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 new file mode 100644 index 00000000..51007a64 --- /dev/null +++ b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 @@ -0,0 +1,51 @@ +[CmdletBinding()] +param( + [ValidateSet('buggy', 'benign')] + [string] $Fixture, + + [ValidateSet('codex', 'claude', 'droid', 'copilot')] + [string[]] $Engine, + + [switch] $DryRun, + + [Alias('h')] + [switch] $Help +) + +$ErrorActionPreference = 'Stop' + +$Harness = Join-Path $PSScriptRoot 'test-review-harness.py' +$ForwardedArgs = @() + +if ($Help) { + $ForwardedArgs += '--help' +} + +if ($PSBoundParameters.ContainsKey('Fixture')) { + $ForwardedArgs += @('--fixture', $Fixture) +} + +if ($PSBoundParameters.ContainsKey('Engine')) { + foreach ($SelectedEngine in $Engine) { + $ForwardedArgs += @('--engine', $SelectedEngine) + } +} + +if ($DryRun) { + $ForwardedArgs += '--dry-run' +} + +$PyLauncher = Get-Command py -ErrorAction SilentlyContinue +if ($null -ne $PyLauncher) { + & $PyLauncher.Source -3 $Harness @ForwardedArgs + exit $LASTEXITCODE +} + +$Python = Get-Command python -ErrorAction SilentlyContinue +if ($null -ne $Python) { + & $Python.Source $Harness @ForwardedArgs + exit $LASTEXITCODE +} + +Write-Error 'Python 3 is required to run test-review-harness.' +exit 127 diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py index 8a7540d5..8035c42f 100755 --- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py +++ b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py @@ -12,6 +12,8 @@ from pathlib import Path +ENGINES = ("codex", "claude", "droid", "copilot") + SAFE_INITIAL = """import numpy as np @@ -63,20 +65,21 @@ def trim_eeg(eeg, start_sample, stop_sample): "Review normally and report only concrete defects introduced by the patch." ) BENIGN_PROMPT = ( - "Calibration fixture: this EEG change intentionally validates sample bounds " - "and preserves channel-major data. Do not flag it unless there is a concrete bug." + "Calibration fixture: this EEG change validates 1-based sample bounds and " + "preserves channel-major data. Do not flag it unless there is a concrete bug." ) def parse_args(argv: list[str]) -> argparse.Namespace: parser = argparse.ArgumentParser( - description=( - "Create a temporary EEG-style git repo and run the adapted autoreview helper " - "against a buggy or benign patch." - ) + prog="test-review-harness", + description="Create a temporary EEG-style repo and run the adapted autoreview helper.", ) parser.add_argument("--fixture", choices=("buggy", "benign"), default="buggy") - parser.add_argument("--dry-run", action="store_true", help="Only verify helper target selection.") + parser.add_argument("--engine", action="append", choices=ENGINES, dest="engines") + parser.add_argument( + "--dry-run", action="store_true", help="Verify target/bundle setup without spending a model call." + ) return parser.parse_args(argv) @@ -98,38 +101,37 @@ def create_fixture_repo(repo: Path, fixture: str) -> None: write_fixture_file(repo, BUGGY_CHANGED if fixture == "buggy" else BENIGN_CHANGED) -def run_review(repo: Path, script_dir: Path, fixture: str, *, dry_run: bool) -> None: +def run_reviews(repo: Path, script_dir: Path, fixture: str, engines: list[str], *, dry_run: bool) -> None: autoreview = script_dir / "autoreview" - command = [ - sys.executable, - str(autoreview), - "--mode", - "local", - "--prompt", - BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT, - ] - if fixture == "buggy": - command.extend(["--require-finding", "channel", "--expect-findings"]) - if dry_run: - command.append("--dry-run") - run(command, repo) + for engine in engines: + print(f"== {engine} ==", flush=True) + command = [ + sys.executable, + str(autoreview), + "--mode", + "local", + "--engine", + engine, + "--prompt", + BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT, + ] + if fixture == "buggy": + command.extend(["--require-finding", "channel", "--expect-findings"]) + if dry_run: + command.append("--dry-run") + run(command, repo) def cleanup_repo(repo: Path) -> None: - def make_writable_and_retry( - function: Callable[[str], object], - path: str, - _exc_info: object, - ) -> None: + def make_writable_and_retry(function: Callable[[str], object], path: str, _exc_info: object) -> None: try: os.chmod(path, stat.S_IREAD | stat.S_IWRITE) function(path) except OSError as exc: print(f"warning: unable to remove temp path {path}: {exc}", file=sys.stderr) - if not repo.exists(): - return - shutil.rmtree(repo, onerror=make_writable_and_retry) + if repo.exists(): + shutil.rmtree(repo, onerror=make_writable_and_retry) def main(argv: list[str]) -> int: @@ -138,7 +140,7 @@ def main(argv: list[str]) -> int: repo = Path(tempfile.mkdtemp(prefix="eegprep-autoreview-fixture.")) try: create_fixture_repo(repo, args.fixture) - run_review(repo, script_dir, args.fixture, dry_run=args.dry_run) + run_reviews(repo, script_dir, args.fixture, args.engines or ["codex"], dry_run=args.dry_run) except subprocess.CalledProcessError as exc: return int(exc.returncode or 1) finally: From 732d6e27ed9ce067e618c2097712c6c20eacaa0c Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Mon, 8 Jun 2026 20:49:17 -0700 Subject: [PATCH 02/16] Strengthen autoreview architecture checks --- .agents/skills/oc-autoreview-adapted/SKILL.md | 14 +++++++++++++- .../oc-autoreview-adapted/agents/openai.yaml | 4 ++-- .../oc-autoreview-adapted/scripts/autoreview | 11 +++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md index 763a9f4b..02891340 100644 --- a/.agents/skills/oc-autoreview-adapted/SKILL.md +++ b/.agents/skills/oc-autoreview-adapted/SKILL.md @@ -12,7 +12,8 @@ Use the bundled helper for high-signal closeout review or whole-codebase bug hun - Run it for real unless the user asked only for a plan. - Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it. - Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects. -- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, and subjective MATLAB/Python style comments. +- Accept structural findings when the code becomes harder to ship: spaghetti branching, wrong ownership layer, duplicate canonical helpers, non-atomic state updates, file sprawl, weak data boundaries, or indirection that hides EEG invariants. +- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, subjective MATLAB/Python style comments, and "cleaner someday" feedback without a concrete failure mode. - If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason. - Do not invoke nested review tools from inside review. The helper already runs one structured review path. - Do not push/stage/commit/open PR unless the user requested that separately. @@ -95,6 +96,17 @@ Prioritize: - packaged Markdown help for GUI Help / `pophelp`; - realistic EEG-size performance and concrete security/path/I/O risks. +## Architecture Bar + +Use this only for code that will make EEGPrep less reliable or maintainable, not for taste. + +- Look for a simpler "code judo" move that preserves behavior while deleting branches, modes, helper layers, or special cases. +- Flag spaghetti growth: ad-hoc conditionals in busy flows, scattered feature checks, one-off booleans, nullable modes, and partial updates. +- Keep logic in the canonical layer: signal processing in `sigprocfunc`, user wrappers in `popfunc`, GUI/session coordination in `guifunc`/`adminfunc`, plugin code in its plugin package, CLI orchestration outside core math. +- Prefer existing helpers/contracts over near-duplicates; remove thin wrappers or generic magic that hide simple EEG data shapes. +- Treat file-size growth past roughly 1000 lines as a warning in diff review; in whole-codebase audits, flag large modules only with a concrete bug-prone coupling or focused split. +- Prefer fixes that remove concepts, collapse duplicate branches, clarify data boundaries, or make state/session/history updates atomic. + ## Loop 1. Format first if formatting can change line locations. diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml index 7c1c3100..7b94c64a 100644 --- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml +++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "OC Autoreview Adapted" - short_description: "EEGPrep autoreview for diffs or codebase audits" - default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real findings." + short_description: "EEGPrep autoreview for bugs, parity, and architecture" + default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real bugs, parity breaks, and structural issues." diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview index 5fdee362..e623ea05 100755 --- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview +++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview @@ -52,6 +52,7 @@ CATEGORIES = { "regression", "test_gap", "maintainability", + "architecture", "eeglab_parity", "data_structure", "gui_session", @@ -590,6 +591,16 @@ def build_prompt( - For each finding, use the smallest file/line location that demonstrates the issue. - If there are no actionable findings, return an empty findings array and mark the patch correct. + Structural quality bar: + - Do not approve code merely because it works. Flag architecture that makes EEGPrep harder to ship reliably. + - Look for a simpler "code judo" move: preserving behavior while deleting branches, modes, helper layers, or special cases. + - Flag spaghetti growth: ad-hoc conditionals inserted into busy flows, one-off booleans, nullable modes, scattered feature checks, and partial state updates. + - Flag logic in the wrong layer: GUI/session code doing signal processing, pop_* wrappers owning low-level math, CLI/console code bypassing session/history contracts, or runtime paths depending on vendored EEGLAB. + - Prefer canonical helpers and existing contracts over bespoke near-duplicates. Report duplicate helpers when a clear canonical home already exists. + - Be skeptical of thin wrappers, identity abstractions, cast-heavy/loosely-shaped boundaries, or generic magic that hides simple EEG data invariants. + - Treat file sprawl as a smell. In diff review, flag changes that push a file past roughly 1000 lines without a strong reason. In codebase audit, flag oversized modules only when there is a concrete bug-prone coupling or a focused decomposition path. + - Prefer remedies that remove concepts, collapse duplicate branches, make state updates atomic, clarify typed/data boundaries, or move logic to the owning module. + EEGPrep review priorities: - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows. - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures. From a649906a9a58db14436c837cf4c3214b58755a26 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Tue, 9 Jun 2026 04:08:18 -0700 Subject: [PATCH 03/16] Add scoped autoreview campaign workflow --- .agents/skills/oc-autoreview-adapted/SKILL.md | 40 ++++++++++ .../oc-autoreview-adapted/agents/openai.yaml | 4 +- .../oc-autoreview-adapted/scripts/autoreview | 74 +++++++++++++++++-- 3 files changed, 108 insertions(+), 10 deletions(-) diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md index 02891340..ff382e81 100644 --- a/.agents/skills/oc-autoreview-adapted/SKILL.md +++ b/.agents/skills/oc-autoreview-adapted/SKILL.md @@ -17,6 +17,7 @@ Use the bundled helper for high-signal closeout review or whole-codebase bug hun - If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason. - Do not invoke nested review tools from inside review. The helper already runs one structured review path. - Do not push/stage/commit/open PR unless the user requested that separately. +- If the user asks to review the whole codebase and wants fixes, default to the campaign workflow below: branch/worktree per codebase area, review/fix/test/rerun, then open a PR before moving to the next area. ## Commands @@ -54,6 +55,44 @@ Whole EEGPrep-owned codebase audit: The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope. +Scoped codebase audit: + +```bash +"$AUTOREVIEW" --mode codebase \ + --path src/eegprep/functions/popfunc \ + --path tests/test_pop_utils.py \ + --thinking codex=xhigh +``` + +Use `--scope-file scopes.txt` when a slice has many paths. + +## Default Whole-Codebase Campaign + +When asked to review the whole codebase hands-off, do not make one giant PR. Split work into PR-sized areas, usually: + +- `popfunc`: `src/eegprep/functions/popfunc`, matching pop tests/help. +- `sigproc`: `src/eegprep/functions/sigprocfunc`, numerical/parity tests. +- `gui-session`: `src/eegprep/functions/guifunc`, `adminfunc`, console/session tests. +- `plugins`: `src/eegprep/plugins`, bundled plugin tests/resources. +- `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence. +- `cli-docs-tools`: CLI, docs, skills, tools, workflows. + +For each area: + +1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`. +2. Run `autoreview --mode codebase --path ... --thinking codex=xhigh`. +3. Verify each finding from first principles. Fix real issues even when the fix touches a related helper outside the initial path scope; keep the PR conceptually tied to that area. +4. Run focused tests, lint/type checks when relevant, then rerun the same scoped autoreview until clean or until remaining findings are rejected with reasons. +5. Commit, push, and open a PR before starting the next area. + +PR body must list every finding reviewed: + +- **Fixed:** finding, root cause, files changed, tests run. +- **Rejected:** finding, why it is not real or not worth changing. +- **Follow-up:** only when real but intentionally outside this PR's area. + +Do not auto-merge. The human reviews each PR normally. + ## Useful Options - `--engine codex|claude|droid|copilot`; default is Codex. @@ -62,6 +101,7 @@ The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and - `--stream-engine-output` to see compact live engine activity. - `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs. - `--prompt` / `--prompt-file` / `--dataset` to add evidence. +- `--path` / `--scope-file` to constrain a codebase, branch, local, or commit review to a PR-sized area. - `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts. - `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing. - `--skip-fetch` avoids fetching before branch diffs. diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml index 7b94c64a..55464109 100644 --- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml +++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "OC Autoreview Adapted" - short_description: "EEGPrep autoreview for bugs, parity, and architecture" - default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real bugs, parity breaks, and structural issues." + short_description: "EEGPrep autoreview loops and PR campaigns" + default_prompt: "Use $oc-autoreview-adapted to run scoped autoreview loops or a whole-codebase PR campaign, then verify and fix real bugs, parity breaks, and structural issues." diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview index e623ea05..2c0c8e9b 100755 --- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview +++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview @@ -26,6 +26,7 @@ from __future__ import annotations import argparse import concurrent.futures import copy +import fnmatch import json import os import queue @@ -438,13 +439,53 @@ def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str: ) +def normalize_scope_spec(scope: str) -> str: + normalized = scope.strip().replace("\\", "/").removeprefix("./").rstrip("/") + if not normalized: + raise SystemExit("scope path cannot be empty") + path = Path(normalized) + if path.is_absolute() or ".." in path.parts: + raise SystemExit(f"scope path must be repo-relative: {scope}") + return normalized + + +def load_scope_specs(args: argparse.Namespace) -> list[str]: + scopes = [normalize_scope_spec(scope) for scope in args.path or []] + for scope_file in args.scope_file or []: + path = Path(scope_file) + for line in path.read_text().splitlines(): + value = line.split("#", 1)[0].strip() + if value: + scopes.append(normalize_scope_spec(value)) + seen: set[str] = set() + result: list[str] = [] + for scope in scopes: + if scope in seen: + continue + seen.add(scope) + result.append(scope) + return result + + +def path_matches_scope(rel: str, scopes: list[str]) -> bool: + if not scopes: + return True + return any( + rel == scope + or rel.startswith(f"{scope}/") + or fnmatch.fnmatchcase(rel, scope) + or fnmatch.fnmatchcase(rel, f"{scope}/**") + for scope in scopes + ) + + def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]: - prefixes = [scope.strip().rstrip("/") for scope in scopes or [] if scope.strip()] + scope_specs = scopes or [] paths: set[str] = set() for rel in git(repo, "ls-files").splitlines(): if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES): continue - if prefixes and not any(rel == prefix or rel.startswith(f"{prefix}/") for prefix in prefixes): + if not path_matches_scope(rel, scope_specs): continue paths.add(rel) return paths @@ -480,10 +521,12 @@ def commit_bundle(repo: Path, commit_ref: str) -> str: ) -def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: str) -> set[str]: +def review_paths( + repo: Path, target: str, target_ref: str | None, commit_ref: str, scopes: list[str] | None = None +) -> set[str]: names: set[str] = set() if target == "codebase": - return codebase_paths(repo) + return codebase_paths(repo, scopes) if target == "local": sources = [ git(repo, "diff", "--name-only", "--cached"), @@ -498,7 +541,7 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st for source in sources: for line in source.splitlines(): path = line.strip() - if path: + if path and path_matches_scope(path, scopes or []): names.add(path) return names @@ -1193,6 +1236,16 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--prompt", action="append", help="Additional review instruction text.") parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.") parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.") + parser.add_argument( + "--path", + action="append", + help="Repo-relative file, directory, or glob to include in the review scope. Repeat for multiple scopes.", + ) + parser.add_argument( + "--scope-file", + action="append", + help="File containing repo-relative --path scopes, one per line. Blank lines and # comments are ignored.", + ) parser.add_argument("--output", help="Write human output to a file as well as stdout.") parser.add_argument("--json-output", help="Write validated structured review JSON.") parser.add_argument("--heartbeat-seconds", type=int, default=60) @@ -1389,6 +1442,7 @@ def run_panel( def main() -> int: args = parse_args() + scope_specs = load_scope_specs(args) reviewers = reviewer_args(args) repo = repo_root() target, target_ref = choose_target(repo, args.mode, args.base) @@ -1407,6 +1461,8 @@ def main() -> int: display_ref = args.commit if target == "commit" else target_ref if display_ref: print(f"ref: {display_ref}") + if scope_specs: + print(f"path filters: {', '.join(scope_specs)}") if target == "local": bundle = local_bundle(repo) @@ -1414,14 +1470,16 @@ def main() -> int: assert target_ref bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch) elif target == "codebase": - changed_paths = codebase_paths(repo) + changed_paths = codebase_paths(repo, scope_specs) bundle = codebase_bundle(repo, changed_paths) else: bundle = commit_bundle(repo, args.commit) target_ref = args.commit - changed_paths = review_paths(repo, target, target_ref, args.commit) + changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs) if target != "codebase": - changed_paths = review_paths(repo, target, target_ref, args.commit) + changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs) + if scope_specs and not changed_paths: + raise SystemExit("no files matched --path/--scope-file for this review target") instructions = instruction_bundle(repo, changed_paths) prompt = build_prompt( repo, From dac62e07295f817395a1cc2eca7dedd4f0f92c50 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Tue, 9 Jun 2026 04:14:10 -0700 Subject: [PATCH 04/16] Add parallel autoreview campaign scaffolding --- .agents/skills/oc-autoreview-adapted/SKILL.md | 22 ++ .../oc-autoreview-adapted/agents/openai.yaml | 4 +- .../scripts/new-review-campaign.py | 279 ++++++++++++++++++ .gitignore | 1 + 4 files changed, 304 insertions(+), 2 deletions(-) create mode 100755 .agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md index ff382e81..37e0ea72 100644 --- a/.agents/skills/oc-autoreview-adapted/SKILL.md +++ b/.agents/skills/oc-autoreview-adapted/SKILL.md @@ -26,6 +26,7 @@ Set paths once: ```bash export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview" export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness" +export AUTOREVIEW_CAMPAIGN=".agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py" ``` Dirty local work: @@ -65,6 +66,7 @@ Scoped codebase audit: ``` Use `--scope-file scopes.txt` when a slice has many paths. +Quote any `--path` value that contains shell globs, e.g. `--path 'tests/test_pop_*.py'`. ## Default Whole-Codebase Campaign @@ -77,6 +79,14 @@ When asked to review the whole codebase hands-off, do not make one giant PR. Spl - `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence. - `cli-docs-tools`: CLI, docs, skills, tools, workflows. +Start by scaffolding an orchestration artifact: + +```bash +uv run python "$AUTOREVIEW_CAMPAIGN" "EEGPrep whole-codebase autoreview" +``` + +This creates `.workflow//` with `plan.md`, `state.json`, `orchestration.md`, `packets/`, `results/`, and `final-report.md`. Keep `plan.md` human-readable, update `state.json` as packet status changes, and write integration evidence in `final-report.md`. + For each area: 1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`. @@ -93,6 +103,18 @@ PR body must list every finding reviewed: Do not auto-merge. The human reviews each PR normally. +## Parallel Subagents + +Use parallel agents by default for whole-codebase campaigns when the environment exposes subagent/thread/worktree tools and the user has asked for hands-off or parallel work. + +- Launch at most 3 packet agents at once unless the user approves more. +- Give each packet agent its `packets/.md`, base branch, branch name, path scope, test commands, AGENTS.md constraints, and PR-body requirements. +- Packet agents may edit related files outside their path scope only when required by the verified root cause; they must explain that in the PR. +- Do not duplicate work across agents. If a packet blocks on another packet's result, keep it pending. +- Parent agent owns integration: track packet PR URLs, inspect conflicts, synthesize accepted/rejected findings, and run broader checks after packet PRs merge. +- If no subagent runner is available, simulate packets sequentially and write packet notes under `results/`. +- Do not claim that a script launched subagents. The campaign script only scaffolds orchestration; actual subagents require exposed agent/thread tools. + ## Useful Options - `--engine codex|claude|droid|copilot`; default is Codex. diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml index 55464109..4453480c 100644 --- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml +++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "OC Autoreview Adapted" - short_description: "EEGPrep autoreview loops and PR campaigns" - default_prompt: "Use $oc-autoreview-adapted to run scoped autoreview loops or a whole-codebase PR campaign, then verify and fix real bugs, parity breaks, and structural issues." + short_description: "Parallel EEGPrep autoreview PR campaigns" + default_prompt: "Use $oc-autoreview-adapted to run scoped or parallel whole-codebase autoreview loops, then verify and fix real bugs, parity breaks, and structural issues with one PR per codebase area." diff --git a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py new file mode 100755 index 00000000..5ea77a07 --- /dev/null +++ b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +"""Create an EEGPrep autoreview campaign workflow directory.""" + +from __future__ import annotations + +import argparse +import json +import re +import shlex +from datetime import UTC, datetime +from pathlib import Path +from typing import TypedDict + + +class Packet(TypedDict): + id: str + branch: str + paths: list[str] + tests: list[str] + + +DEFAULT_PACKETS: list[Packet] = [ + { + "id": "01-popfunc", + "branch": "autoreview/popfunc", + "paths": [ + "src/eegprep/functions/popfunc", + "src/eegprep/resources/help/pop_*.md", + "tests/test_pop_*.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_pop_utils.py tests/test_file_menu_pop_functions.py", + ], + }, + { + "id": "02-sigproc", + "branch": "autoreview/sigproc", + "paths": [ + "src/eegprep/functions/sigprocfunc", + "tests/test_*runica*.py", + "tests/test_*resample*.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_pop_resample_python.py tests/test_eeg_runica.py tests/test_runica.py tests/test_gui_pop_runica.py", + ], + }, + { + "id": "03-gui-session", + "branch": "autoreview/gui-session", + "paths": [ + "src/eegprep/functions/guifunc", + "src/eegprep/functions/adminfunc", + "tests/test_console_workspace.py", + "tests/test_gui_*.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_console_workspace.py tests/test_gui_main_window.py", + ], + }, + { + "id": "04-plugins", + "branch": "autoreview/plugins", + "paths": [ + "src/eegprep/plugins", + "tests/test_*clean*.py", + "tests/test_*iclabel*.py", + "tests/test_*bids*.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_gui_pop_clean_rawdata.py tests/test_iclabel.py", + ], + }, + { + "id": "05-io-bids-study", + "branch": "autoreview/io-bids-study", + "paths": [ + "src/eegprep/functions/popfunc/pop_fileio.py", + "src/eegprep/functions/popfunc/pop_loadset.py", + "src/eegprep/functions/popfunc/pop_saveset.py", + "src/eegprep/plugins/EEG_BIDS", + "src/eegprep/functions/studyfunc", + "tests/test_*study*.py", + "tests/test_*bids*.py", + "tests/test_file_menu_pop_functions.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_file_menu_pop_functions.py tests/test_study_metadata.py tests/test_study_measures.py tests/test_study_clustering.py tests/test_study_end_to_end.py", + ], + }, + { + "id": "06-cli-docs-tools", + "branch": "autoreview/cli-docs-tools", + "paths": [ + "src/eegprep/cli", + "docs/source", + ".agents/skills", + "tools", + "scripts", + "tests/test_cli*.py", + ], + "tests": [ + "uv run --no-sync pytest tests/test_cli_main.py tests/test_cli_transforms.py tests/test_cli_pipeline_qc_report.py tests/test_cli_bids_eeglab_commands.py", + "./pre-commit.py --changed-from origin/develop", + ], + }, +] + + +def slugify(value: str) -> str: + slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return slug[:64].strip("-") or "autoreview-campaign" + + +def write_new(path: Path, content: str) -> None: + if not path.exists(): + path.write_text(content, encoding="utf-8") + + +def packet_prompt(packet: Packet, base: str) -> str: + paths = "\n".join(f" --path {shlex.quote(path)} \\" for path in packet["paths"]) + tests = "\n".join(f"- `{test}`" for test in packet["tests"]) + return f"""# Packet {packet["id"]}: {packet["branch"]} + +## Objective +Run a scoped EEGPrep autoreview loop for this codebase area, fix real findings from first principles, and open a PR to `{base}`. + +## Scope +{chr(10).join(f"- `{path}`" for path in packet["paths"])} + +Fixes may touch related helpers outside this scope when required by the root cause, but keep the PR conceptually tied to this packet. + +## Command + +```bash +.agents/skills/oc-autoreview-adapted/scripts/autoreview \\ + --mode codebase \\ +{paths} + --thinking codex=xhigh +``` + +## Verification +Run focused checks first: + +{tests} + +Then run broader checks if the fix affects shared behavior. + +## PR Requirements +- Branch: `{packet["branch"]}` +- Target: `{base}` +- PR body must list every finding reviewed: + - Fixed: finding, root cause, files changed, tests run. + - Rejected: finding and why it is not real or not worth changing. + - Follow-up: only when real but intentionally outside this PR. + +## Do Not +- Do not auto-merge. +- Do not revert unrelated concurrent work. +- Do not report vague architecture preferences without concrete failure modes. +""" + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("title", nargs="?", default="EEGPrep autoreview campaign") + parser.add_argument("--root", default=".workflow") + parser.add_argument("--slug") + parser.add_argument("--base", default="origin/develop") + parser.add_argument("--max-concurrent", type=int, default=3) + args = parser.parse_args() + + slug = slugify(args.slug or args.title) + run_dir = Path(args.root) / slug + packets_dir = run_dir / "packets" + results_dir = run_dir / "results" + packets_dir.mkdir(parents=True, exist_ok=True) + results_dir.mkdir(parents=True, exist_ok=True) + + now = datetime.now(UTC).replace(microsecond=0).isoformat() + packets = [ + { + "id": packet["id"], + "branch": packet["branch"], + "paths": packet["paths"], + "tests": packet["tests"], + "status": "pending", + "pr": None, + } + for packet in DEFAULT_PACKETS + ] + state = { + "title": args.title, + "slug": slug, + "created_at": now, + "status": "planned", + "base": args.base, + "max_concurrent_agents": args.max_concurrent, + "packets": packets, + "integration": {"status": "not_started", "notes": ""}, + } + write_new(run_dir / "state.json", json.dumps(state, indent=2) + "\n") + write_new( + run_dir / "plan.md", + f"""# {args.title} + +## Goal +Run parallel scoped autoreview loops across EEGPrep, fix real bugs/parity/architecture issues, and open PRs for human review. + +## Success Criteria +- Every packet has a PR or a recorded no-change result. +- Each PR body lists fixed, rejected, and follow-up findings. +- Each packet reruns autoreview after fixes. +- Integration checks pass after packet PRs merge. + +## Constraints +- Keep AGENTS.md and EEGPrep's EEGLAB parity goal in force. +- Runtime code must remain standalone and not depend on vendored EEGLAB. +- Do not auto-merge packet PRs. +- Max concurrent agents: {args.max_concurrent}. + +## Risks +- Concurrent work conflicts: keep packet ownership mostly disjoint and resolve against authoritative code. +- Noisy architecture findings: accept only findings with concrete failure modes. + +## Work Packets +{chr(10).join(f"- `{packet['id']}` -> `{packet['branch']}`" for packet in packets)} + +## Integration Policy +Parent agent tracks PRs, resolves conflicts after merges, runs broader checks, and updates final-report.md. +""", + ) + write_new( + run_dir / "orchestration.md", + f"""# Orchestration: {args.title} + +## Execution Rules +- Use available subagent/thread/worktree tools when exposed by the environment. +- Spawn at most {args.max_concurrent} packet agents at once. +- Each packet owns its branch and opens one PR before the parent starts further work in that area. +- If no subagent runner is available, execute packets sequentially and write notes in `results/`. +- Parent integrates packet results; do not paste raw worker dumps as final status. + +## Packet Launch +Give each worker only its packet file plus AGENTS.md context. Workers must not revert unrelated edits and must adapt to concurrent changes. + +## Completion Audit +- All packet PRs created or no-change results recorded. +- PR bodies include every finding reviewed. +- Final integration checks recorded in `final-report.md`. +""", + ) + write_new( + run_dir / "final-report.md", + f"""# Final Report: {args.title} + +## Outcome + +## Packet PRs + +## Findings Fixed + +## Findings Rejected + +## Follow-ups + +## Integration Verification + +## Remaining Risks +""", + ) + for packet in DEFAULT_PACKETS: + write_new(packets_dir / f"{packet['id']}.md", packet_prompt(packet, args.base)) + + print(run_dir) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.gitignore b/.gitignore index dbe68211..a9967c33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .claude/settings.local.json .context/ +.workflow/ .coveragerc .notes/* !.notes/.gitkeep From 57448a5ead105f33cd58b92125f689713eb2bed7 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Tue, 9 Jun 2026 18:25:50 -0700 Subject: [PATCH 05/16] Remove broken autoreview skill --- .agents/skills/oc-autoreview-adapted/SKILL.md | 182 -- .../oc-autoreview-adapted/agents/openai.yaml | 4 - .../oc-autoreview-adapted/scripts/autoreview | 1546 ----------------- .../scripts/new-review-campaign.py | 279 --- .../scripts/test-review-harness | 16 - .../scripts/test-review-harness.ps1 | 51 - .../scripts/test-review-harness.py | 152 -- .gitignore | 1 - 8 files changed, 2231 deletions(-) delete mode 100644 .agents/skills/oc-autoreview-adapted/SKILL.md delete mode 100644 .agents/skills/oc-autoreview-adapted/agents/openai.yaml delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/autoreview delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness delete mode 100644 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md deleted file mode 100644 index 37e0ea72..00000000 --- a/.agents/skills/oc-autoreview-adapted/SKILL.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -name: oc-autoreview-adapted -description: Run autonomous EEGPrep-focused structured autoreview on dirty changes, branches, commits, PR stacks, or the whole EEGPrep-owned codebase; verify and fix real findings from first principles using AGENTS.md, EEGLAB parity, GUI/console, tests, docs, and security constraints. ---- - -# OC Autoreview Adapted - -Use the bundled helper for high-signal closeout review or whole-codebase bug hunts. It builds one bounded review bundle, runs one or more read-only reviewer engines, validates structured JSON, prints heartbeats for long runs, and exits nonzero when actionable findings remain. - -## Contract - -- Run it for real unless the user asked only for a plan. -- Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it. -- Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects. -- Accept structural findings when the code becomes harder to ship: spaghetti branching, wrong ownership layer, duplicate canonical helpers, non-atomic state updates, file sprawl, weak data boundaries, or indirection that hides EEG invariants. -- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, subjective MATLAB/Python style comments, and "cleaner someday" feedback without a concrete failure mode. -- If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason. -- Do not invoke nested review tools from inside review. The helper already runs one structured review path. -- Do not push/stage/commit/open PR unless the user requested that separately. -- If the user asks to review the whole codebase and wants fixes, default to the campaign workflow below: branch/worktree per codebase area, review/fix/test/rerun, then open a PR before moving to the next area. - -## Commands - -Set paths once: - -```bash -export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview" -export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness" -export AUTOREVIEW_CAMPAIGN=".agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py" -``` - -Dirty local work: - -```bash -"$AUTOREVIEW" --mode local -``` - -Branch or stacked PR work: - -```bash -base=$(gh pr view --json baseRefName --jq .baseRefName 2>/dev/null || echo develop) -"$AUTOREVIEW" --mode branch --base "origin/$base" -``` - -Single committed change: - -```bash -"$AUTOREVIEW" --mode commit --commit HEAD -``` - -Whole EEGPrep-owned codebase audit: - -```bash -"$AUTOREVIEW" --mode codebase --thinking codex=xhigh -``` - -The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope. - -Scoped codebase audit: - -```bash -"$AUTOREVIEW" --mode codebase \ - --path src/eegprep/functions/popfunc \ - --path tests/test_pop_utils.py \ - --thinking codex=xhigh -``` - -Use `--scope-file scopes.txt` when a slice has many paths. -Quote any `--path` value that contains shell globs, e.g. `--path 'tests/test_pop_*.py'`. - -## Default Whole-Codebase Campaign - -When asked to review the whole codebase hands-off, do not make one giant PR. Split work into PR-sized areas, usually: - -- `popfunc`: `src/eegprep/functions/popfunc`, matching pop tests/help. -- `sigproc`: `src/eegprep/functions/sigprocfunc`, numerical/parity tests. -- `gui-session`: `src/eegprep/functions/guifunc`, `adminfunc`, console/session tests. -- `plugins`: `src/eegprep/plugins`, bundled plugin tests/resources. -- `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence. -- `cli-docs-tools`: CLI, docs, skills, tools, workflows. - -Start by scaffolding an orchestration artifact: - -```bash -uv run python "$AUTOREVIEW_CAMPAIGN" "EEGPrep whole-codebase autoreview" -``` - -This creates `.workflow//` with `plan.md`, `state.json`, `orchestration.md`, `packets/`, `results/`, and `final-report.md`. Keep `plan.md` human-readable, update `state.json` as packet status changes, and write integration evidence in `final-report.md`. - -For each area: - -1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`. -2. Run `autoreview --mode codebase --path ... --thinking codex=xhigh`. -3. Verify each finding from first principles. Fix real issues even when the fix touches a related helper outside the initial path scope; keep the PR conceptually tied to that area. -4. Run focused tests, lint/type checks when relevant, then rerun the same scoped autoreview until clean or until remaining findings are rejected with reasons. -5. Commit, push, and open a PR before starting the next area. - -PR body must list every finding reviewed: - -- **Fixed:** finding, root cause, files changed, tests run. -- **Rejected:** finding, why it is not real or not worth changing. -- **Follow-up:** only when real but intentionally outside this PR's area. - -Do not auto-merge. The human reviews each PR normally. - -## Parallel Subagents - -Use parallel agents by default for whole-codebase campaigns when the environment exposes subagent/thread/worktree tools and the user has asked for hands-off or parallel work. - -- Launch at most 3 packet agents at once unless the user approves more. -- Give each packet agent its `packets/.md`, base branch, branch name, path scope, test commands, AGENTS.md constraints, and PR-body requirements. -- Packet agents may edit related files outside their path scope only when required by the verified root cause; they must explain that in the PR. -- Do not duplicate work across agents. If a packet blocks on another packet's result, keep it pending. -- Parent agent owns integration: track packet PR URLs, inspect conflicts, synthesize accepted/rejected findings, and run broader checks after packet PRs merge. -- If no subagent runner is available, simulate packets sequentially and write packet notes under `results/`. -- Do not claim that a script launched subagents. The campaign script only scaffolds orchestration; actual subagents require exposed agent/thread tools. - -## Useful Options - -- `--engine codex|claude|droid|copilot`; default is Codex. -- `--reviewers codex,claude` or `--panel` for a multi-reviewer pass. -- `--model codex=gpt-5.1 --thinking codex=xhigh`; Claude also accepts `max`. -- `--stream-engine-output` to see compact live engine activity. -- `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs. -- `--prompt` / `--prompt-file` / `--dataset` to add evidence. -- `--path` / `--scope-file` to constrain a codebase, branch, local, or commit review to a PR-sized area. -- `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts. -- `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing. -- `--skip-fetch` avoids fetching before branch diffs. -- `--heartbeat-seconds 60` controls long-run heartbeat cadence. - -Smoke check: - -```bash -"$AUTOREVIEW_HARNESS" --dry-run -"$AUTOREVIEW_HARNESS" --fixture buggy --engine codex -``` - -On Windows, use: - -```powershell -python .agents\skills\oc-autoreview-adapted\scripts\autoreview --help -.agents\skills\oc-autoreview-adapted\scripts\test-review-harness.ps1 -Fixture buggy -Engine codex -``` - -## EEGPrep Review Surface - -Prioritize: - -- correctness, runtime/import failures, bad numerical results, broken common workflows; -- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI layout/behavior, events, and data structures; -- EEG dict invariants: `data`, `nbchan`, `pnts`, `trials`, `srate`, `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`, ICA fields; -- 1-based EEGLAB user indices/latencies versus 0-based Python array indices; -- channel-major continuous `(nbchan, pnts)` and epoched `(nbchan, pnts, trials)` data; -- GUI plus `eegprep-console` synchronization through `EEGPrepSession`; -- `return_com=True`, `(EEG, com)` returns, history replay, and session update paths; -- runtime independence from `src/eegprep/eeglab`; -- packaged Markdown help for GUI Help / `pophelp`; -- realistic EEG-size performance and concrete security/path/I/O risks. - -## Architecture Bar - -Use this only for code that will make EEGPrep less reliable or maintainable, not for taste. - -- Look for a simpler "code judo" move that preserves behavior while deleting branches, modes, helper layers, or special cases. -- Flag spaghetti growth: ad-hoc conditionals in busy flows, scattered feature checks, one-off booleans, nullable modes, and partial updates. -- Keep logic in the canonical layer: signal processing in `sigprocfunc`, user wrappers in `popfunc`, GUI/session coordination in `guifunc`/`adminfunc`, plugin code in its plugin package, CLI orchestration outside core math. -- Prefer existing helpers/contracts over near-duplicates; remove thin wrappers or generic magic that hide simple EEG data shapes. -- Treat file-size growth past roughly 1000 lines as a warning in diff review; in whole-codebase audits, flag large modules only with a concrete bug-prone coupling or focused split. -- Prefer fixes that remove concepts, collapse duplicate branches, clarify data boundaries, or make state/session/history updates atomic. - -## Loop - -1. Format first if formatting can change line locations. -2. Run autoreview on the smallest sufficient target. -3. Verify each finding against code and AGENTS.md. -4. Fix accepted findings at the right ownership boundary. -5. Run focused tests, then broader tests if risk warrants. -6. Rerun the same autoreview target. -7. Final response: command used, tests run, findings fixed/rejected, and final clean result or remaining risk. - -If the helper prints `autoreview clean: no accepted/actionable findings reported` and exits 0, report that as clean and stop. diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml deleted file mode 100644 index 4453480c..00000000 --- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml +++ /dev/null @@ -1,4 +0,0 @@ -interface: - display_name: "OC Autoreview Adapted" - short_description: "Parallel EEGPrep autoreview PR campaigns" - default_prompt: "Use $oc-autoreview-adapted to run scoped or parallel whole-codebase autoreview loops, then verify and fix real bugs, parity breaks, and structural issues with one PR per codebase area." diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview deleted file mode 100755 index 2c0c8e9b..00000000 --- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview +++ /dev/null @@ -1,1546 +0,0 @@ -#!/usr/bin/env python3 -# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper: -# https://github.com/openclaw/agent-skills/tree/main/skills/autoreview -# -# Original copyright (c) 2026 openclaw. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -from __future__ import annotations - -import argparse -import concurrent.futures -import copy -import fnmatch -import json -import os -import queue -import subprocess -import sys -import tempfile -import textwrap -import threading -import time -from pathlib import Path -from typing import Any, Callable - - -ENGINES = ("codex", "claude", "droid", "copilot") -DEFAULT_BASE = "origin/develop" -TRUNK_BRANCHES = {"develop", "main", "master"} -CODEBASE_EXCLUDED_PREFIXES = ( - "src/eegprep/eeglab/", - "sample_data/", -) -CATEGORIES = { - "bug", - "security", - "regression", - "test_gap", - "maintainability", - "architecture", - "eeglab_parity", - "data_structure", - "gui_session", - "docs_help", - "performance", -} -THINKING_LEVELS_BY_ENGINE = { - "codex": {"low", "medium", "high", "xhigh"}, - "claude": {"low", "medium", "high", "xhigh", "max"}, - "droid": set(), - "copilot": set(), -} - - -SCHEMA: dict[str, Any] = { - "type": "object", - "additionalProperties": False, - "required": [ - "findings", - "overall_correctness", - "overall_explanation", - "overall_confidence", - ], - "properties": { - "findings": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": False, - "required": [ - "title", - "body", - "priority", - "confidence", - "category", - "code_location", - ], - "properties": { - "title": {"type": "string", "minLength": 1, "maxLength": 140}, - "body": {"type": "string", "minLength": 1, "maxLength": 2400}, - "priority": {"type": "string", "enum": ["P0", "P1", "P2", "P3"]}, - "confidence": {"type": "number", "minimum": 0, "maximum": 1}, - "category": { - "type": "string", - "enum": sorted(CATEGORIES), - }, - "code_location": { - "type": "object", - "additionalProperties": False, - "required": ["file_path", "line"], - "properties": { - "file_path": {"type": "string", "minLength": 1}, - "line": {"type": "integer", "minimum": 1}, - }, - }, - }, - }, - }, - "overall_correctness": { - "type": "string", - "enum": ["patch is correct", "patch is incorrect"], - }, - "overall_explanation": {"type": "string", "minLength": 1, "maxLength": 3000}, - "overall_confidence": {"type": "number", "minimum": 0, "maximum": 1}, - }, -} - - -def run( - args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True -) -> subprocess.CompletedProcess[str]: - result = subprocess.run( - args, - cwd=cwd, - input=input_text, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if check and result.returncode != 0: - cmd = " ".join(args) - raise SystemExit(f"command failed ({result.returncode}): {cmd}\n{result.stderr or result.stdout}") - return result - - -def run_with_heartbeat( - args: list[str], - cwd: Path, - *, - input_text: str | None = None, - label: str, - heartbeat_seconds: int = 60, - stream_output: bool = False, - stream_display: Callable[[str, str], str | None] | None = None, -) -> subprocess.CompletedProcess[str]: - if stream_output: - return run_with_stream( - args, - cwd, - input_text=input_text, - label=label, - heartbeat_seconds=heartbeat_seconds, - stream_display=stream_display, - ) - started = time.monotonic() - proc = subprocess.Popen( - args, - cwd=cwd, - stdin=subprocess.PIPE if input_text is not None else None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - first_communicate = True - while True: - try: - stdout, stderr = proc.communicate( - input=input_text if first_communicate else None, - timeout=heartbeat_seconds, - ) - return subprocess.CompletedProcess(args, int(proc.returncode or 0), stdout, stderr) - except subprocess.TimeoutExpired: - first_communicate = False - elapsed = int(time.monotonic() - started) - print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) - - -def run_with_stream( - args: list[str], - cwd: Path, - *, - input_text: str | None, - label: str, - heartbeat_seconds: int, - stream_display: Callable[[str, str], str | None] | None, -) -> subprocess.CompletedProcess[str]: - started = time.monotonic() - proc = subprocess.Popen( - args, - cwd=cwd, - stdin=subprocess.PIPE if input_text is not None else None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - bufsize=1, - ) - events: queue.Queue[tuple[str, str | None]] = queue.Queue() - stdout_parts: list[str] = [] - stderr_parts: list[str] = [] - - def read_stream(name: str, stream: Any) -> None: - try: - for line in iter(stream.readline, ""): - events.put((name, line)) - finally: - events.put((name, None)) - - def write_stdin() -> None: - if proc.stdin is None or input_text is None: - return - try: - proc.stdin.write(input_text) - proc.stdin.close() - except BrokenPipeError: - return - - threads = [ - threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True), - threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True), - ] - for thread in threads: - thread.start() - stdin_thread = threading.Thread(target=write_stdin, daemon=True) - stdin_thread.start() - - open_streams = 2 - while open_streams: - try: - name, line = events.get(timeout=heartbeat_seconds) - except queue.Empty: - elapsed = int(time.monotonic() - started) - print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) - continue - if line is None: - open_streams -= 1 - continue - if name == "stdout": - stdout_parts.append(line) - else: - stderr_parts.append(line) - display = stream_display(name, line) if stream_display else line - if display: - target = sys.stdout if name == "stdout" else sys.stderr - target.write(display) - target.flush() - - for thread in threads: - thread.join() - stdin_thread.join(timeout=1) - returncode = proc.wait() - return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts)) - - -def git(repo: Path, *args: str, check: bool = True) -> str: - return run([resolve_command("git", repo), *args], repo, check=check).stdout - - -def repo_root() -> Path: - start = Path.cwd().resolve() - unsafe_root = discover_repo_root(start) or start - git_bin = find_command("git", unsafe_root) - if not git_bin: - raise SystemExit("git executable not found. Install Git or add it to PATH.") - result = subprocess.run( - [git_bin, "rev-parse", "--show-toplevel"], - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if result.returncode != 0: - raise SystemExit("autoreview must run inside a git repository") - return Path(result.stdout.strip()).resolve() - - -def discover_repo_root(start: Path) -> Path | None: - current = start - while True: - if (current / ".git").exists(): - return current - if current.parent == current: - return None - current = current.parent - - -def current_branch(repo: Path) -> str: - return git(repo, "branch", "--show-current", check=False).strip() or "detached" - - -def is_dirty(repo: Path) -> bool: - return bool(git(repo, "status", "--porcelain").strip()) - - -def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]: - mode = "local" if mode == "uncommitted" else mode - branch = current_branch(repo) - if mode == "local" or (mode == "auto" and is_dirty(repo)): - return "local", None - if mode == "codebase": - return "codebase", None - if mode == "commit": - return "commit", None - if mode == "branch" or (mode == "auto" and branch not in TRUNK_BRANCHES): - return "branch", base_ref or detect_pr_base(repo) or DEFAULT_BASE - raise SystemExit( - "no review target: clean trunk checkout and no forced mode. " - "Pass --mode codebase, --mode branch --base , or --mode commit --commit ." - ) - - -def detect_pr_base(repo: Path) -> str | None: - gh_bin = find_command("gh", repo) - if not gh_bin: - return None - result = run([gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], repo, check=False) - base = result.stdout.strip() - return f"origin/{base}" if result.returncode == 0 and base else None - - -def resolve_command(name: str, repo: Path) -> str: - resolved = find_command(name, repo) - if resolved: - return resolved - raise SystemExit(f"executable not found: {name}. Install it or pass an explicit trusted path when supported.") - - -def find_command(name: str, repo: Path) -> str | None: - command = Path(name) - if has_directory_component(name, command): - base = command if command.is_absolute() else repo / command - return first_executable_candidate(base) - for part in os.environ.get("PATH", "").split(os.pathsep): - if not part or part == ".": - continue - path_part = Path(part) - if not path_part.is_absolute(): - continue - try: - resolved_part = path_part.resolve() - resolved_repo = repo.resolve() - except OSError: - continue - if is_within(resolved_part, resolved_repo): - continue - found = first_executable_candidate(resolved_part / name, reject_root=resolved_repo) - if found: - return found - return None - - -def is_within(path: Path, root: Path) -> bool: - return path == root or path.is_relative_to(root) - - -def has_directory_component(name: str, command: Path) -> bool: - separators = [separator for separator in (os.sep, os.altsep) if separator] - return command.is_absolute() or bool(command.drive) or any(separator in name for separator in separators) - - -def first_executable_candidate(path: Path, *, reject_root: Path | None = None) -> str | None: - if os.name == "nt" and not path.suffix: - extensions = [ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext] - candidates = [path.with_suffix(ext.lower()) for ext in extensions] - candidates.extend(path.with_suffix(ext.upper()) for ext in extensions) - candidates.append(path) - else: - candidates = [path] - for candidate in candidates: - if candidate.is_file() and os.access(candidate, os.X_OK): - if reject_root is not None: - try: - if is_within(candidate.resolve(), reject_root): - continue - except OSError: - continue - return str(candidate) - return None - - -def bounded(text: str, limit: int = 180_000) -> str: - if len(text) <= limit: - return text - return text[:limit] + f"\n\n[truncated at {limit} characters]\n" - - -def bounded_field(text: str, limit: int) -> str: - if len(text) <= limit: - return text - suffix = "\n\n[truncated]" - return text[: max(0, limit - len(suffix))] + suffix - - -def read_text(path: Path, limit: int = 40_000) -> str: - try: - data = path.read_bytes() - except OSError as exc: - return f"[unreadable: {exc}]" - if b"\0" in data: - return "[binary file omitted]" - text = data.decode("utf-8", errors="replace") - return bounded(text, limit) - - -def local_bundle(repo: Path) -> str: - parts = [ - "# Git Status", - git(repo, "status", "--short"), - "# Staged Diff", - git(repo, "diff", "--cached", "--stat"), - bounded(git(repo, "diff", "--cached", "--patch", "--find-renames")), - "# Unstaged Diff", - git(repo, "diff", "--stat"), - bounded(git(repo, "diff", "--patch", "--find-renames")), - ] - untracked = [line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line] - if untracked: - parts.append("# Untracked Files") - for rel in untracked: - path = repo / rel - parts.append(f"## {rel}\n{read_text(path)}") - return "\n\n".join(parts) - - -def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str: - if not skip_fetch: - git(repo, "fetch", "origin", "--quiet", check=False) - return "\n\n".join( - [ - "# Branch Diff", - f"base: {base_ref}", - git(repo, "diff", "--stat", f"{base_ref}...HEAD"), - bounded(git(repo, "diff", "--patch", "--find-renames", f"{base_ref}...HEAD")), - ] - ) - - -def normalize_scope_spec(scope: str) -> str: - normalized = scope.strip().replace("\\", "/").removeprefix("./").rstrip("/") - if not normalized: - raise SystemExit("scope path cannot be empty") - path = Path(normalized) - if path.is_absolute() or ".." in path.parts: - raise SystemExit(f"scope path must be repo-relative: {scope}") - return normalized - - -def load_scope_specs(args: argparse.Namespace) -> list[str]: - scopes = [normalize_scope_spec(scope) for scope in args.path or []] - for scope_file in args.scope_file or []: - path = Path(scope_file) - for line in path.read_text().splitlines(): - value = line.split("#", 1)[0].strip() - if value: - scopes.append(normalize_scope_spec(value)) - seen: set[str] = set() - result: list[str] = [] - for scope in scopes: - if scope in seen: - continue - seen.add(scope) - result.append(scope) - return result - - -def path_matches_scope(rel: str, scopes: list[str]) -> bool: - if not scopes: - return True - return any( - rel == scope - or rel.startswith(f"{scope}/") - or fnmatch.fnmatchcase(rel, scope) - or fnmatch.fnmatchcase(rel, f"{scope}/**") - for scope in scopes - ) - - -def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]: - scope_specs = scopes or [] - paths: set[str] = set() - for rel in git(repo, "ls-files").splitlines(): - if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES): - continue - if not path_matches_scope(rel, scope_specs): - continue - paths.add(rel) - return paths - - -def codebase_bundle(repo: Path, paths: set[str]) -> str: - grouped: dict[str, int] = {} - for rel in paths: - top = rel.split("/", 1)[0] - grouped[top] = grouped.get(top, 0) + 1 - inventory = "\n".join(f"- {path}" for path in sorted(paths)) - summary = "\n".join(f"- {name}: {count} tracked files" for name, count in sorted(grouped.items())) - return "\n\n".join( - [ - "# Codebase Audit", - "This is a repository-wide audit, not a diff review. The reviewer may inspect in-scope files with read-only tools.", - "# Scope Summary", - summary or "[no tracked files]", - "# In-Scope Tracked Files", - bounded(inventory, 120_000), - ] - ) - - -def commit_bundle(repo: Path, commit_ref: str) -> str: - return "\n\n".join( - [ - "# Commit Diff", - f"commit: {commit_ref}", - git(repo, "show", "--stat", "--format=fuller", commit_ref), - bounded(git(repo, "show", "--patch", "--find-renames", "--format=fuller", commit_ref)), - ] - ) - - -def review_paths( - repo: Path, target: str, target_ref: str | None, commit_ref: str, scopes: list[str] | None = None -) -> set[str]: - names: set[str] = set() - if target == "codebase": - return codebase_paths(repo, scopes) - if target == "local": - sources = [ - git(repo, "diff", "--name-only", "--cached"), - git(repo, "diff", "--name-only"), - git(repo, "ls-files", "--others", "--exclude-standard"), - ] - elif target == "branch": - assert target_ref - sources = [git(repo, "diff", "--name-only", f"{target_ref}...HEAD")] - else: - sources = [git(repo, "show", "--name-only", "--format=", commit_ref)] - for source in sources: - for line in source.splitlines(): - path = line.strip() - if path and path_matches_scope(path, scopes or []): - names.add(path) - return names - - -def load_extra_prompt(args: argparse.Namespace) -> str: - chunks: list[str] = [] - for value in args.prompt or []: - chunks.append(value) - for path in args.prompt_file or []: - chunks.append(Path(path).read_text()) - return "\n\n".join(chunks) - - -def load_datasets(args: argparse.Namespace) -> str: - chunks: list[str] = [] - for spec in args.dataset or []: - path = Path(spec) - if path.is_dir(): - raise SystemExit(f"--dataset must be a file, got directory: {path}") - chunks.append(f"# Dataset: {path}\n{read_text(path)}") - return "\n\n".join(chunks) - - -def instruction_paths(repo: Path, scope_paths: set[str]) -> list[Path]: - paths = {repo / "AGENTS.md"} - for rel in scope_paths: - rel_path = Path(rel) - if rel_path.is_absolute() or ".." in rel_path.parts: - continue - current = (repo / rel_path).parent - while True: - candidate = current / "AGENTS.md" - if candidate.exists(): - paths.add(candidate) - if current == repo or current.parent == current: - break - current = current.parent - return sorted(path for path in paths if path.exists()) - - -def instruction_bundle(repo: Path, scope_paths: set[str]) -> str: - paths = instruction_paths(repo, scope_paths) - if not paths: - return "# Repository Instructions\n[no AGENTS.md files found]" - parts = ["# Repository Instructions"] - for path in paths: - rel = path.relative_to(repo) - parts.append(f"## {rel}\n{read_text(path, limit=80_000)}") - return "\n\n".join(parts) - - -def build_prompt( - repo: Path, - target: str, - target_ref: str | None, - scope_paths: set[str], - instructions: str, - bundle: str, - extra_prompt: str, - datasets: str, -) -> str: - target_line = f"{target} {target_ref}" if target_ref else target - scope_label = "Changed Paths" if target != "codebase" else "Review Scope" - scope_list = "\n".join(f"- {path}" for path in sorted(scope_paths)) or "[no paths]" - scope_rule = ( - "Report only actionable defects introduced or exposed by this change." - if target != "codebase" - else ( - "Report concrete actionable defects in the in-scope EEGPrep-owned codebase. " - "Do not require diff provenance, but do avoid stale vendored/reference code and vague wishlist items." - ) - ) - return textwrap.dedent( - f""" - You are a senior EEGPrep code reviewer. Review the provided bundle and inspect files as needed. - - Hard rules: - - Return exactly one JSON object and nothing else. Do not wrap it in Markdown. - - The JSON object must match this schema exactly: - {json.dumps(SCHEMA, indent=2)} - - Do not modify files. - - Do not invoke nested reviewers or review tools. - - Forbidden nested review commands include: codex review, autoreview, claude review, oracle review. - - You may use read-only tools and web search to inspect files, dependency contracts, upstream docs, current behavior, and security implications. - - Shell commands, if available, must be read-only inspection commands. Do not run tests, formatters, package installs, generators, network mutation commands, git mutation commands, or commands that write files. - - {scope_rule} - - Prefer high-signal findings over style feedback. - - Include security findings only for concrete risks: injection, secret leaks, authz/authn bypass, path traversal, unsafe deserialization, unsafe filesystem/shell use, privacy leaks, and credential handling. - - Do not reject legitimate functionality merely because it touches shell, filesystem, network, auth, or sensitive data. Report a security finding only when the patch creates a concrete exploitable risk, removes an important safety check, or lacks validation at a trust boundary. - - For each finding, use the smallest file/line location that demonstrates the issue. - - If there are no actionable findings, return an empty findings array and mark the patch correct. - - Structural quality bar: - - Do not approve code merely because it works. Flag architecture that makes EEGPrep harder to ship reliably. - - Look for a simpler "code judo" move: preserving behavior while deleting branches, modes, helper layers, or special cases. - - Flag spaghetti growth: ad-hoc conditionals inserted into busy flows, one-off booleans, nullable modes, scattered feature checks, and partial state updates. - - Flag logic in the wrong layer: GUI/session code doing signal processing, pop_* wrappers owning low-level math, CLI/console code bypassing session/history contracts, or runtime paths depending on vendored EEGLAB. - - Prefer canonical helpers and existing contracts over bespoke near-duplicates. Report duplicate helpers when a clear canonical home already exists. - - Be skeptical of thin wrappers, identity abstractions, cast-heavy/loosely-shaped boundaries, or generic magic that hides simple EEG data invariants. - - Treat file sprawl as a smell. In diff review, flag changes that push a file past roughly 1000 lines without a strong reason. In codebase audit, flag oversized modules only when there is a concrete bug-prone coupling or a focused decomposition path. - - Prefer remedies that remove concepts, collapse duplicate branches, make state updates atomic, clarify typed/data boundaries, or move logic to the owning module. - - EEGPrep review priorities: - - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows. - - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures. - - EEG dict fields: data, nbchan, pnts, trials, srate, xmin, xmax, times, chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere, icaweights, icachansind. - - MATLAB/Python indexing boundaries: EEGLAB event latencies and user-facing indices are usually 1-based; Python arrays are 0-based. - - Channel-major data shape: continuous (nbchan, pnts), epoched (nbchan, pnts, trials). - - GUI plus eegprep-console session sync through EEGPrepSession. - - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings, and GUI/session update paths. - - Runtime code must not depend on src/eegprep/eeglab existing; use it only as a development reference. - - GUI Help/pophelp needs EEGPrep-owned packaged Markdown resources. - - Missing tests only when tied to a concrete changed or audited behavior. - - Realistic EEG-size performance regressions. - - Review target: {target_line} - Repository: {repo} - - # {scope_label} - {scope_list} - - {extra_prompt} - - {datasets} - - {instructions} - - # Change Bundle - {bundle} - """ - ).strip() - - -def write_json_temp(data: dict[str, Any]) -> Path: - handle = tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) - with handle: - json.dump(data, handle) - return Path(handle.name) - - -def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: - if not args.tools: - raise SystemExit( - "--no-tools is not supported by the Codex engine; use --engine claude --no-tools for a no-tools run" - ) - schema_path = write_json_temp(SCHEMA) - output_path = Path(tempfile.NamedTemporaryFile("w", suffix=".json", delete=False).name) - cmd = [resolve_command(args.codex_bin, repo), "--ask-for-approval", "never"] - if args.web_search: - cmd.append("--search") - if args.model: - cmd.extend(["--model", args.model]) - if args.thinking: - cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"']) - cmd.append("exec") - if args.stream_engine_output: - cmd.append("--json") - cmd.extend( - [ - "--ephemeral", - "-C", - str(repo), - "-s", - "read-only", - "--output-schema", - str(schema_path), - "--output-last-message", - str(output_path), - "-", - ] - ) - result = run_with_heartbeat( - cmd, - repo, - input_text=prompt, - label="codex", - heartbeat_seconds=args.heartbeat_seconds, - stream_output=args.stream_engine_output, - stream_display=CodexStreamDisplay() if args.stream_engine_output else None, - ) - try: - output = output_path.read_text() - finally: - schema_path.unlink(missing_ok=True) - output_path.unlink(missing_ok=True) - if result.returncode != 0: - raise SystemExit(f"codex engine failed ({result.returncode})\n{result.stderr or result.stdout}") - return output or result.stdout - - -def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str: - cmd = [ - resolve_command(args.claude_bin, repo), - "--print", - "--no-session-persistence", - "--output-format", - "stream-json" if args.stream_engine_output else "json", - "--json-schema", - json.dumps(SCHEMA), - ] - if args.tools: - cmd.extend(["--allowedTools", claude_allowed_tools(args)]) - else: - cmd.extend(["--tools", ""]) - if args.stream_engine_output: - cmd.append("--verbose") - if args.model: - cmd.extend(["--model", args.model]) - if args.thinking: - cmd.extend(["--effort", args.thinking]) - result = run_with_heartbeat( - cmd, - repo, - input_text=prompt, - label="claude", - heartbeat_seconds=args.heartbeat_seconds, - stream_output=args.stream_engine_output, - stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None, - ) - if result.returncode != 0: - raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}") - return result.stdout - - -def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str: - if args.thinking: - raise SystemExit("--thinking is not supported by the droid engine") - prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name) - prompt_path.write_text(prompt) - cmd = [ - resolve_command(args.droid_bin, repo), - "exec", - "--cwd", - str(repo), - "--output-format", - "json", - "-f", - str(prompt_path), - ] - if args.model: - cmd.extend(["--model", args.model]) - if not args.tools: - cmd.extend(["--disabled-tools", "*"]) - result = run_with_heartbeat( - cmd, - repo, - label="droid", - heartbeat_seconds=args.heartbeat_seconds, - stream_output=args.stream_engine_output, - ) - prompt_path.unlink(missing_ok=True) - if result.returncode != 0: - raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}") - return result.stdout - - -def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str: - if args.thinking: - raise SystemExit("--thinking is not supported by the copilot engine") - if not args.tools: - raise SystemExit( - "--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv" - ) - with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir: - prompt_path = Path(tempdir) / "prompt.txt" - prompt_path.write_text(prompt) - os.chmod(prompt_path, 0o600) - cmd = [ - resolve_command(args.copilot_bin, repo), - "-C", - tempdir, - "-p", - "Read ./prompt.txt and follow it exactly. Return only the requested JSON object.", - "--output-format", - "json", - "--stream", - "on" if args.stream_engine_output else "off", - "--no-ask-user", - "--disable-builtin-mcps", - ] - if args.model: - cmd.extend(["--model", args.model]) - cmd.extend( - [ - "--available-tools=read_agent,rg,view,web_fetch", - "--allow-tool=read_agent", - "--allow-tool=rg", - "--allow-tool=view", - "--allow-tool=web_fetch", - ] - ) - if args.web_search: - cmd.append("--allow-all-urls") - result = run_with_heartbeat( - cmd, - Path(tempdir), - label="copilot", - heartbeat_seconds=args.heartbeat_seconds, - stream_output=args.stream_engine_output, - ) - if result.returncode != 0: - raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}") - return result.stdout - - -class CodexStreamDisplay: - def __init__(self, *, activity_seconds: int = 20) -> None: - self.activity_seconds = activity_seconds - self.hidden_events = 0 - self.last_visible = time.monotonic() - - def __call__(self, name: str, line: str) -> str | None: - if name != "stdout": - return line - try: - event = json.loads(line) - except json.JSONDecodeError: - return self.visible(line) - event_type = event.get("type") - if event_type == "thread.started": - return self.visible(f"codex thread: {event.get('thread_id', '')}\n") - if event_type == "turn.started": - return self.visible("codex turn started\n") - if event_type == "turn.completed": - usage = event.get("usage") - message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n" - return self.visible(self.flush_hidden() + message) - item = event.get("item") - if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str): - return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n") - return self.hidden_activity() - - def hidden_activity(self) -> str | None: - self.hidden_events += 1 - if time.monotonic() - self.last_visible < self.activity_seconds: - return None - return self.visible(self.flush_hidden()) - - def flush_hidden(self) -> str: - if not self.hidden_events: - return "" - count = self.hidden_events - self.hidden_events = 0 - return f"codex activity: {count} hidden tool/status events\n" - - def visible(self, text: str) -> str: - self.last_visible = time.monotonic() - return text - - -class ClaudeStreamDisplay: - def __init__(self, *, activity_seconds: int = 20) -> None: - self.activity_seconds = activity_seconds - self.hidden_events = 0 - self.last_visible = time.monotonic() - self.started = False - - def __call__(self, name: str, line: str) -> str | None: - if name != "stdout": - return line - try: - event = json.loads(line) - except json.JSONDecodeError: - return self.visible(line) - event_type = event.get("type") - if event_type == "system" and not self.started: - self.started = True - return self.visible("claude turn started\n") - if event_type == "assistant": - return self.assistant_message(event) - if event_type == "result": - return self.visible(self.flush_hidden() + self.result_summary(event)) - return self.hidden_activity() - - def assistant_message(self, event: dict[str, Any]) -> str | None: - message = event.get("message") - if not isinstance(message, dict): - return self.hidden_activity() - chunks: list[str] = [] - for item in message.get("content", []): - if not isinstance(item, dict): - continue - if item.get("type") == "text" and isinstance(item.get("text"), str): - chunks.append(item["text"].rstrip()) - if chunks: - return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n") - return self.hidden_activity() - - def result_summary(self, event: dict[str, Any]) -> str: - usage = event.get("usage") - fields: list[str] = [] - if isinstance(usage, dict): - for key in ( - "input_tokens", - "cache_read_input_tokens", - "cache_creation_input_tokens", - "output_tokens", - ): - value = usage.get(key) - if isinstance(value, int): - fields.append(f"{key}={value}") - cost = event.get("total_cost_usd") - if isinstance(cost, (int, float)) and not isinstance(cost, bool): - fields.append(f"cost_usd={cost:.6f}") - return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n" - - def hidden_activity(self) -> str | None: - self.hidden_events += 1 - if time.monotonic() - self.last_visible < self.activity_seconds: - return None - return self.visible(self.flush_hidden()) - - def flush_hidden(self) -> str: - if not self.hidden_events: - return "" - count = self.hidden_events - self.hidden_events = 0 - return f"claude activity: {count} hidden tool/status events\n" - - def visible(self, text: str) -> str: - self.last_visible = time.monotonic() - return text - - -def format_codex_usage(usage: dict[str, Any]) -> str: - fields = [ - "input_tokens", - "cached_input_tokens", - "output_tokens", - "reasoning_output_tokens", - ] - parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)] - return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable" - - -def claude_allowed_tools(args: argparse.Namespace) -> str: - tools = [tool.strip() for tool in args.claude_allowed_tools.split(",") if tool.strip()] - if not args.web_search: - tools = [tool for tool in tools if tool not in {"WebSearch", "WebFetch"}] - return ",".join(tools) - - -def extract_json(text: str) -> dict[str, Any]: - stripped = text.strip() - if not stripped: - raise SystemExit("review engine returned empty output") - try: - parsed = json.loads(stripped) - except json.JSONDecodeError as exc: - fenced_report = parse_json_candidate(stripped) - if isinstance(fenced_report, dict) and "findings" in fenced_report: - return fenced_report - jsonl_report = extract_json_from_jsonl(stripped) - if jsonl_report: - return jsonl_report - raise SystemExit(f"review engine returned non-JSON output: {exc}\n{stripped[:2000]}") - if isinstance(parsed, dict) and "findings" in parsed: - return parsed - if isinstance(parsed, dict) and isinstance(parsed.get("structured_output"), dict): - return parsed["structured_output"] - if isinstance(parsed, dict) and isinstance(parsed.get("result"), str): - result_json = parse_json_candidate(parsed["result"]) - if isinstance(result_json, dict) and "findings" in result_json: - return result_json - raise SystemExit(f"review engine result was not structured JSON:\n{parsed['result'][:2000]}") - jsonl_report = extract_json_from_jsonl(stripped) - if jsonl_report: - return jsonl_report - raise SystemExit(f"review engine returned unexpected JSON shape:\n{json.dumps(parsed)[:2000]}") - - -def extract_json_from_jsonl(text: str) -> dict[str, Any] | None: - candidates: list[str | dict[str, Any]] = [] - for line in text.splitlines(): - line = line.strip() - if not line: - continue - try: - event = json.loads(line) - except json.JSONDecodeError: - continue - if not isinstance(event, dict): - continue - part = event.get("part") - if isinstance(part, dict) and isinstance(part.get("text"), str): - candidates.append(part["text"]) - data = event.get("data") - if isinstance(data, dict) and isinstance(data.get("content"), str): - candidates.append(data["content"]) - if isinstance(event.get("result"), str): - candidates.append(event["result"]) - if isinstance(event.get("structured_output"), dict): - candidates.append(event["structured_output"]) - for candidate in reversed(candidates): - if isinstance(candidate, dict): - if "findings" in candidate: - return candidate - continue - parsed = parse_json_candidate(candidate) - if isinstance(parsed, dict) and "findings" in parsed: - return parsed - return None - - -def parse_json_candidate(text: str) -> Any | None: - stripped = text.strip() - if stripped.startswith("```"): - lines = stripped.splitlines() - if lines and lines[0].startswith("```") and lines[-1].strip() == "```": - stripped = "\n".join(lines[1:-1]).strip() - try: - parsed = json.loads(stripped) - except json.JSONDecodeError: - return None - if isinstance(parsed, str) and parsed != text: - nested = parse_json_candidate(parsed) - return nested if nested is not None else parsed - return parsed - - -def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None: - allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"} - extra_top = set(report) - allowed_top - if extra_top: - raise SystemExit(f"review JSON has unexpected top-level keys: {sorted(extra_top)}") - for key in SCHEMA["required"]: - if key not in report: - raise SystemExit(f"review JSON missing required key: {key}") - if not isinstance(report["findings"], list): - raise SystemExit("review JSON findings must be an array") - if report.get("overall_correctness") not in {"patch is correct", "patch is incorrect"}: - raise SystemExit(f"review JSON has invalid overall_correctness: {report.get('overall_correctness')}") - if not isinstance(report.get("overall_explanation"), str) or not report["overall_explanation"]: - raise SystemExit("review JSON overall_explanation must be a non-empty string") - if len(report["overall_explanation"]) > 3000: - raise SystemExit("review JSON overall_explanation is too long") - if not number_in_range(report.get("overall_confidence")): - raise SystemExit("review JSON overall_confidence must be numeric") - finding_text = "" - kept_findings: list[dict[str, Any]] = [] - ignored_findings: list[tuple[int, dict[str, Any], str, int]] = [] - for index, finding in enumerate(report["findings"]): - if not isinstance(finding, dict): - raise SystemExit(f"finding {index} must be an object") - allowed_finding = {"title", "body", "priority", "confidence", "category", "code_location"} - extra_finding = set(finding) - allowed_finding - if extra_finding: - raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}") - for key in allowed_finding: - if key not in finding: - raise SystemExit(f"finding {index} missing required key: {key}") - title = finding.get("title") - if not isinstance(title, str) or not title or len(title) > 140: - raise SystemExit(f"finding {index} has invalid title") - body = finding.get("body") - if not isinstance(body, str) or not body or len(body) > 2400: - raise SystemExit(f"finding {index} has invalid body") - priority = finding.get("priority") - if priority not in {"P0", "P1", "P2", "P3"}: - raise SystemExit(f"finding {index} has invalid priority: {priority}") - if not number_in_range(finding.get("confidence")): - raise SystemExit(f"finding {index} has invalid confidence") - category = finding.get("category") - if category not in CATEGORIES: - raise SystemExit(f"finding {index} has invalid category: {category}") - location = finding.get("code_location") - if not isinstance(location, dict): - raise SystemExit(f"finding {index} missing code_location") - rel = str(location.get("file_path", "")).strip() - line = location.get("line") - if not rel or not isinstance(line, int) or line < 1: - raise SystemExit(f"finding {index} has invalid location: {location}") - if Path(rel).is_absolute() or ".." in Path(rel).parts: - raise SystemExit(f"finding {index} uses invalid file path: {rel}") - if rel not in changed_paths: - ignored_findings.append((index, finding, rel, line)) - continue - kept_findings.append(finding) - finding_text += "\n" + json.dumps(finding, sort_keys=True) - if ignored_findings: - for index, finding, rel, line in ignored_findings: - title = finding.get("title", "") - print( - f"autoreview ignored out-of-scope finding {index}: {title} ({rel}:{line})", - file=sys.stderr, - ) - print(bounded_field(str(finding.get("body", "")), 500), file=sys.stderr) - report["findings"] = kept_findings - if not kept_findings and report["overall_correctness"] == "patch is incorrect": - note = f"Ignored {len(ignored_findings)} out-of-scope finding(s) outside the reviewed change." - explanation = report["overall_explanation"].rstrip() - report["overall_correctness"] = "patch is correct" - report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000) - haystack = finding_text.lower() - for needle in required: - if needle.lower() not in haystack: - raise SystemExit(f"required finding text not found: {needle}") - - -def number_in_range(value: Any) -> bool: - return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1 - - -def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None: - findings = report["findings"] - if findings: - print(f"{label} findings: {len(findings)}") - elif report["overall_correctness"] == "patch is incorrect": - print(f"{label} verdict: patch is incorrect without discrete findings") - else: - print(f"{label} clean: no accepted/actionable findings reported") - for finding in findings: - loc = finding["code_location"] - print(f"[{finding['priority']}] {finding['title']}") - print(f"{loc['file_path']}:{loc['line']}") - print(f"{finding['body']}") - print() - print(f"overall: {report['overall_correctness']} ({report['overall_confidence']})") - print(report["overall_explanation"]) - - -def start_parallel_tests(command: str, repo: Path, shell_kind: str) -> tuple[subprocess.Popen, float]: - print(f"tests: {command}") - if shell_kind == "default" or shell_kind == "cmd": - return subprocess.Popen(command, cwd=repo, shell=True), time.time() - if shell_kind == "powershell": - powershell = resolve_command("powershell", repo) - return subprocess.Popen( - [powershell, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command], - cwd=repo, - ), time.time() - if shell_kind == "pwsh": - pwsh = resolve_command("pwsh", repo) - return subprocess.Popen( - [pwsh, "-NoProfile", "-Command", command], - cwd=repo, - ), time.time() - raise SystemExit(f"invalid --parallel-tests-shell/AUTOREVIEW_PARALLEL_TESTS_SHELL: {shell_kind}") - - -def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int: - proc.wait() - print(f"tests exit: {proc.returncode} after {int(time.time() - started)}s") - return int(proc.returncode or 0) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Bundle-driven AI code review.") - parser.add_argument( - "--mode", - choices=["auto", "local", "uncommitted", "branch", "commit", "codebase"], - default="auto", - ) - parser.add_argument("--base") - parser.add_argument("--commit", default="HEAD") - parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.") - parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex")) - parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.") - parser.add_argument( - "--panel", - action="store_true", - help="Run a Codex/Claude review panel unless --engine changes the first reviewer.", - ) - parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.") - parser.add_argument( - "--thinking", - action="append", - help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.", - ) - parser.add_argument( - "--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails." - ) - parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex")) - parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude")) - parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid")) - parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot")) - parser.add_argument( - "--no-tools", - dest="tools", - action="store_false", - default=True, - help="Disable tools for engines that support it. Codex and copilot reject no-tools review.", - ) - parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True) - parser.add_argument( - "--claude-allowed-tools", - default=os.environ.get( - "AUTOREVIEW_CLAUDE_TOOLS", - "Read,Grep,Glob,WebSearch,WebFetch", - ), - ) - parser.add_argument("--prompt", action="append", help="Additional review instruction text.") - parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.") - parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.") - parser.add_argument( - "--path", - action="append", - help="Repo-relative file, directory, or glob to include in the review scope. Repeat for multiple scopes.", - ) - parser.add_argument( - "--scope-file", - action="append", - help="File containing repo-relative --path scopes, one per line. Blank lines and # comments are ignored.", - ) - parser.add_argument("--output", help="Write human output to a file as well as stdout.") - parser.add_argument("--json-output", help="Write validated structured review JSON.") - parser.add_argument("--heartbeat-seconds", type=int, default=60) - parser.add_argument( - "--stream-engine-output", - action="store_true", - default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1", - help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.", - ) - parser.add_argument( - "--parallel-tests", help="Run a test command concurrently with review; failure fails the helper." - ) - parser.add_argument( - "--parallel-tests-shell", - choices=["default", "cmd", "powershell", "pwsh"], - default=os.environ.get("AUTOREVIEW_PARALLEL_TESTS_SHELL", "default"), - help="Shell for --parallel-tests. Default preserves Python shell=True platform behavior; use powershell or pwsh for PowerShell-specific commands.", - ) - parser.add_argument( - "--require-finding", action="append", default=[], help="Require finding text to contain this substring." - ) - parser.add_argument( - "--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests." - ) - parser.add_argument("--dry-run", action="store_true") - args = parser.parse_args() - if args.engine not in ENGINES: - raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}") - return args - - -def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str: - if args.engine == "codex": - return run_codex(args, repo, prompt) - if args.engine == "claude": - return run_claude(args, repo, prompt) - if args.engine == "droid": - return run_droid(args, repo, prompt) - if args.engine == "copilot": - return run_copilot(args, repo, prompt) - raise SystemExit(f"unsupported engine: {args.engine}") - - -def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]: - global_value: str | None = None - per_engine: dict[str, str] = {} - for raw in values or []: - value = raw.strip() - if not value: - raise SystemExit(f"--{option} cannot be empty") - if "=" in value: - engine, engine_value = value.split("=", 1) - engine = engine.strip() - engine_value = engine_value.strip() - if engine not in ENGINES: - raise SystemExit(f"--{option} uses unknown engine: {engine}") - if not engine_value: - raise SystemExit(f"--{option} for {engine} cannot be empty") - if engine in per_engine: - raise SystemExit(f"--{option} specified more than once for {engine}") - per_engine[engine] = engine_value - else: - if global_value is not None: - raise SystemExit(f"--{option} global value specified more than once") - global_value = value - return global_value, per_engine - - -def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]: - parts = [part.strip() for part in token.split(":")] - if len(parts) > 3 or not parts[0]: - raise SystemExit(f"invalid reviewer spec: {token}") - engine = parts[0] - if engine not in ENGINES: - raise SystemExit(f"unknown reviewer engine: {engine}") - model = parts[1] if len(parts) >= 2 and parts[1] else None - thinking = parts[2] if len(parts) == 3 and parts[2] else None - return engine, model, thinking - - -def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]: - global_model, model_by_engine = parse_keyed_options(args.model, "model") - global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking") - reviewers: list[tuple[str, str | None, str | None]] = [] - if args.reviewers: - tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()] - if len(tokens) == 1 and tokens[0] == "all": - tokens = list(ENGINES) - reviewers = [parse_reviewer_token(token) for token in tokens] - elif args.panel: - engines = [args.engine] - for engine in ("codex", "claude"): - if engine not in engines: - engines.append(engine) - reviewers = [(engine, None, None) for engine in engines] - else: - reviewers = [(args.engine, None, None)] - - seen: set[str] = set() - result: list[argparse.Namespace] = [] - for engine, inline_model, inline_thinking in reviewers: - if engine in seen: - raise SystemExit(f"reviewer specified more than once: {engine}") - seen.add(engine) - model = inline_model or model_by_engine.get(engine) or global_model - thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking - if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]: - valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none" - raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})") - clone = copy.copy(args) - clone.engine = engine - clone.model = model - clone.thinking = thinking - result.append(clone) - return result - - -def reviewer_label(args: argparse.Namespace) -> str: - parts = [args.engine] - if args.model: - parts.append(f"model={args.model}") - if args.thinking: - parts.append(f"thinking={args.thinking}") - return " ".join(parts) - - -def run_reviewer( - args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str] -) -> dict[str, Any]: - raw = run_engine(args, repo, prompt) - report = extract_json(raw) - validate_report(report, repo, changed_paths, required) - return report - - -def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]: - findings: list[dict[str, Any]] = [] - seen: set[tuple[str, int, str, str]] = set() - for label, report in reports: - for finding in report["findings"]: - location = finding["code_location"] - key = ( - location["file_path"], - location["line"], - finding["category"], - " ".join(finding["title"].lower().split()), - ) - if key in seen: - continue - seen.add(key) - merged = copy.deepcopy(finding) - merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2400) - findings.append(merged) - incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports) - summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports) - return { - "findings": findings, - "overall_correctness": "patch is incorrect" if incorrect else "patch is correct", - "overall_explanation": f"Panel review complete. {summary}.", - "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5), - } - - -def run_panel( - args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str] -) -> dict[str, Any]: - reports: list[tuple[str, dict[str, Any]]] = [] - failures: list[str] = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor: - future_by_label = { - executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer) - for reviewer in reviewers - } - for future in concurrent.futures.as_completed(future_by_label): - label = future_by_label[future] - try: - reports.append((label, future.result())) - except SystemExit as exc: - failures.append(f"{label}: {exc}") - except Exception as exc: - failures.append(f"{label}: {exc}") - if failures and not args.allow_partial_panel: - raise SystemExit("autoreview panel failed\n" + "\n".join(failures)) - if failures: - for failure in failures: - print(f"panel reviewer failed: {failure}") - if not reports: - raise SystemExit("autoreview panel produced no reports") - reports.sort(key=lambda item: item[0]) - report = merge_panel_reports(reports) - validate_report(report, repo, changed_paths, args.require_finding) - return report - - -def main() -> int: - args = parse_args() - scope_specs = load_scope_specs(args) - reviewers = reviewer_args(args) - repo = repo_root() - target, target_ref = choose_target(repo, args.mode, args.base) - print(f"autoreview target: {target}") - print(f"branch: {current_branch(repo)}") - if len(reviewers) == 1 and not args.reviewers and not args.panel: - print(f"engine: {reviewers[0].engine}") - if reviewers[0].model: - print(f"model: {reviewers[0].model}") - if reviewers[0].thinking: - print(f"thinking: {reviewers[0].thinking}") - else: - print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}") - print(f"tools: {'on' if args.tools else 'off'}") - print(f"web_search: {'on' if args.web_search else 'off'}") - display_ref = args.commit if target == "commit" else target_ref - if display_ref: - print(f"ref: {display_ref}") - if scope_specs: - print(f"path filters: {', '.join(scope_specs)}") - - if target == "local": - bundle = local_bundle(repo) - elif target == "branch": - assert target_ref - bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch) - elif target == "codebase": - changed_paths = codebase_paths(repo, scope_specs) - bundle = codebase_bundle(repo, changed_paths) - else: - bundle = commit_bundle(repo, args.commit) - target_ref = args.commit - changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs) - if target != "codebase": - changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs) - if scope_specs and not changed_paths: - raise SystemExit("no files matched --path/--scope-file for this review target") - instructions = instruction_bundle(repo, changed_paths) - prompt = build_prompt( - repo, - target, - target_ref, - changed_paths, - instructions, - bundle, - load_extra_prompt(args), - load_datasets(args), - ) - print(f"scope paths: {len(changed_paths)}") - print(f"bundle: {len(prompt)} chars") - if args.dry_run: - return 0 - - tests_proc: tuple[subprocess.Popen, float] | None = None - if args.parallel_tests: - tests_proc = start_parallel_tests(args.parallel_tests, repo, args.parallel_tests_shell) - try: - if len(reviewers) == 1: - report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding) - label = "autoreview" - else: - report = run_panel(args, reviewers, repo, prompt, changed_paths) - label = "autoreview panel" - if args.json_output: - Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n") - - if args.output: - original_stdout = sys.stdout - with Path(args.output).open("w") as handle: - sys.stdout = Tee(original_stdout, handle) - print_report(report, label=label) - sys.stdout = original_stdout - else: - print_report(report, label=label) - finally: - tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0 - - has_findings = bool(report["findings"]) - overall_incorrect = report["overall_correctness"] == "patch is incorrect" - if tests_status != 0: - return 1 - if args.expect_findings: - return 0 if has_findings else 1 - return 1 if has_findings or overall_incorrect else 0 - - -class Tee: - def __init__(self, *streams: Any) -> None: - self.streams = streams - - def write(self, data: str) -> None: - for stream in self.streams: - stream.write(data) - - def flush(self) -> None: - for stream in self.streams: - stream.flush() - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py deleted file mode 100755 index 5ea77a07..00000000 --- a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python3 -"""Create an EEGPrep autoreview campaign workflow directory.""" - -from __future__ import annotations - -import argparse -import json -import re -import shlex -from datetime import UTC, datetime -from pathlib import Path -from typing import TypedDict - - -class Packet(TypedDict): - id: str - branch: str - paths: list[str] - tests: list[str] - - -DEFAULT_PACKETS: list[Packet] = [ - { - "id": "01-popfunc", - "branch": "autoreview/popfunc", - "paths": [ - "src/eegprep/functions/popfunc", - "src/eegprep/resources/help/pop_*.md", - "tests/test_pop_*.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_pop_utils.py tests/test_file_menu_pop_functions.py", - ], - }, - { - "id": "02-sigproc", - "branch": "autoreview/sigproc", - "paths": [ - "src/eegprep/functions/sigprocfunc", - "tests/test_*runica*.py", - "tests/test_*resample*.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_pop_resample_python.py tests/test_eeg_runica.py tests/test_runica.py tests/test_gui_pop_runica.py", - ], - }, - { - "id": "03-gui-session", - "branch": "autoreview/gui-session", - "paths": [ - "src/eegprep/functions/guifunc", - "src/eegprep/functions/adminfunc", - "tests/test_console_workspace.py", - "tests/test_gui_*.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_console_workspace.py tests/test_gui_main_window.py", - ], - }, - { - "id": "04-plugins", - "branch": "autoreview/plugins", - "paths": [ - "src/eegprep/plugins", - "tests/test_*clean*.py", - "tests/test_*iclabel*.py", - "tests/test_*bids*.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_gui_pop_clean_rawdata.py tests/test_iclabel.py", - ], - }, - { - "id": "05-io-bids-study", - "branch": "autoreview/io-bids-study", - "paths": [ - "src/eegprep/functions/popfunc/pop_fileio.py", - "src/eegprep/functions/popfunc/pop_loadset.py", - "src/eegprep/functions/popfunc/pop_saveset.py", - "src/eegprep/plugins/EEG_BIDS", - "src/eegprep/functions/studyfunc", - "tests/test_*study*.py", - "tests/test_*bids*.py", - "tests/test_file_menu_pop_functions.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_file_menu_pop_functions.py tests/test_study_metadata.py tests/test_study_measures.py tests/test_study_clustering.py tests/test_study_end_to_end.py", - ], - }, - { - "id": "06-cli-docs-tools", - "branch": "autoreview/cli-docs-tools", - "paths": [ - "src/eegprep/cli", - "docs/source", - ".agents/skills", - "tools", - "scripts", - "tests/test_cli*.py", - ], - "tests": [ - "uv run --no-sync pytest tests/test_cli_main.py tests/test_cli_transforms.py tests/test_cli_pipeline_qc_report.py tests/test_cli_bids_eeglab_commands.py", - "./pre-commit.py --changed-from origin/develop", - ], - }, -] - - -def slugify(value: str) -> str: - slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") - return slug[:64].strip("-") or "autoreview-campaign" - - -def write_new(path: Path, content: str) -> None: - if not path.exists(): - path.write_text(content, encoding="utf-8") - - -def packet_prompt(packet: Packet, base: str) -> str: - paths = "\n".join(f" --path {shlex.quote(path)} \\" for path in packet["paths"]) - tests = "\n".join(f"- `{test}`" for test in packet["tests"]) - return f"""# Packet {packet["id"]}: {packet["branch"]} - -## Objective -Run a scoped EEGPrep autoreview loop for this codebase area, fix real findings from first principles, and open a PR to `{base}`. - -## Scope -{chr(10).join(f"- `{path}`" for path in packet["paths"])} - -Fixes may touch related helpers outside this scope when required by the root cause, but keep the PR conceptually tied to this packet. - -## Command - -```bash -.agents/skills/oc-autoreview-adapted/scripts/autoreview \\ - --mode codebase \\ -{paths} - --thinking codex=xhigh -``` - -## Verification -Run focused checks first: - -{tests} - -Then run broader checks if the fix affects shared behavior. - -## PR Requirements -- Branch: `{packet["branch"]}` -- Target: `{base}` -- PR body must list every finding reviewed: - - Fixed: finding, root cause, files changed, tests run. - - Rejected: finding and why it is not real or not worth changing. - - Follow-up: only when real but intentionally outside this PR. - -## Do Not -- Do not auto-merge. -- Do not revert unrelated concurrent work. -- Do not report vague architecture preferences without concrete failure modes. -""" - - -def main() -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("title", nargs="?", default="EEGPrep autoreview campaign") - parser.add_argument("--root", default=".workflow") - parser.add_argument("--slug") - parser.add_argument("--base", default="origin/develop") - parser.add_argument("--max-concurrent", type=int, default=3) - args = parser.parse_args() - - slug = slugify(args.slug or args.title) - run_dir = Path(args.root) / slug - packets_dir = run_dir / "packets" - results_dir = run_dir / "results" - packets_dir.mkdir(parents=True, exist_ok=True) - results_dir.mkdir(parents=True, exist_ok=True) - - now = datetime.now(UTC).replace(microsecond=0).isoformat() - packets = [ - { - "id": packet["id"], - "branch": packet["branch"], - "paths": packet["paths"], - "tests": packet["tests"], - "status": "pending", - "pr": None, - } - for packet in DEFAULT_PACKETS - ] - state = { - "title": args.title, - "slug": slug, - "created_at": now, - "status": "planned", - "base": args.base, - "max_concurrent_agents": args.max_concurrent, - "packets": packets, - "integration": {"status": "not_started", "notes": ""}, - } - write_new(run_dir / "state.json", json.dumps(state, indent=2) + "\n") - write_new( - run_dir / "plan.md", - f"""# {args.title} - -## Goal -Run parallel scoped autoreview loops across EEGPrep, fix real bugs/parity/architecture issues, and open PRs for human review. - -## Success Criteria -- Every packet has a PR or a recorded no-change result. -- Each PR body lists fixed, rejected, and follow-up findings. -- Each packet reruns autoreview after fixes. -- Integration checks pass after packet PRs merge. - -## Constraints -- Keep AGENTS.md and EEGPrep's EEGLAB parity goal in force. -- Runtime code must remain standalone and not depend on vendored EEGLAB. -- Do not auto-merge packet PRs. -- Max concurrent agents: {args.max_concurrent}. - -## Risks -- Concurrent work conflicts: keep packet ownership mostly disjoint and resolve against authoritative code. -- Noisy architecture findings: accept only findings with concrete failure modes. - -## Work Packets -{chr(10).join(f"- `{packet['id']}` -> `{packet['branch']}`" for packet in packets)} - -## Integration Policy -Parent agent tracks PRs, resolves conflicts after merges, runs broader checks, and updates final-report.md. -""", - ) - write_new( - run_dir / "orchestration.md", - f"""# Orchestration: {args.title} - -## Execution Rules -- Use available subagent/thread/worktree tools when exposed by the environment. -- Spawn at most {args.max_concurrent} packet agents at once. -- Each packet owns its branch and opens one PR before the parent starts further work in that area. -- If no subagent runner is available, execute packets sequentially and write notes in `results/`. -- Parent integrates packet results; do not paste raw worker dumps as final status. - -## Packet Launch -Give each worker only its packet file plus AGENTS.md context. Workers must not revert unrelated edits and must adapt to concurrent changes. - -## Completion Audit -- All packet PRs created or no-change results recorded. -- PR bodies include every finding reviewed. -- Final integration checks recorded in `final-report.md`. -""", - ) - write_new( - run_dir / "final-report.md", - f"""# Final Report: {args.title} - -## Outcome - -## Packet PRs - -## Findings Fixed - -## Findings Rejected - -## Follow-ups - -## Integration Verification - -## Remaining Risks -""", - ) - for packet in DEFAULT_PACKETS: - write_new(packets_dir / f"{packet['id']}.md", packet_prompt(packet, args.base)) - - print(run_dir) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness deleted file mode 100755 index ab98338a..00000000 --- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -harness="$script_dir/test-review-harness.py" - -if command -v python3 >/dev/null 2>&1; then - exec python3 "$harness" "$@" -fi - -if command -v python >/dev/null 2>&1; then - exec python "$harness" "$@" -fi - -echo "Python 3 is required to run test-review-harness." >&2 -exit 127 diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 deleted file mode 100644 index 51007a64..00000000 --- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 +++ /dev/null @@ -1,51 +0,0 @@ -[CmdletBinding()] -param( - [ValidateSet('buggy', 'benign')] - [string] $Fixture, - - [ValidateSet('codex', 'claude', 'droid', 'copilot')] - [string[]] $Engine, - - [switch] $DryRun, - - [Alias('h')] - [switch] $Help -) - -$ErrorActionPreference = 'Stop' - -$Harness = Join-Path $PSScriptRoot 'test-review-harness.py' -$ForwardedArgs = @() - -if ($Help) { - $ForwardedArgs += '--help' -} - -if ($PSBoundParameters.ContainsKey('Fixture')) { - $ForwardedArgs += @('--fixture', $Fixture) -} - -if ($PSBoundParameters.ContainsKey('Engine')) { - foreach ($SelectedEngine in $Engine) { - $ForwardedArgs += @('--engine', $SelectedEngine) - } -} - -if ($DryRun) { - $ForwardedArgs += '--dry-run' -} - -$PyLauncher = Get-Command py -ErrorAction SilentlyContinue -if ($null -ne $PyLauncher) { - & $PyLauncher.Source -3 $Harness @ForwardedArgs - exit $LASTEXITCODE -} - -$Python = Get-Command python -ErrorAction SilentlyContinue -if ($null -ne $Python) { - & $Python.Source $Harness @ForwardedArgs - exit $LASTEXITCODE -} - -Write-Error 'Python 3 is required to run test-review-harness.' -exit 127 diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py deleted file mode 100755 index 8035c42f..00000000 --- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import annotations - -import argparse -import os -import shutil -import stat -import subprocess -import sys -import tempfile -from collections.abc import Callable -from pathlib import Path - - -ENGINES = ("codex", "claude", "droid", "copilot") - -SAFE_INITIAL = """import numpy as np - - -def trim_eeg(eeg, start_sample, stop_sample): - data = np.asarray(eeg["data"]) - start = int(start_sample) - 1 - stop = int(stop_sample) - trimmed = data[:, start:stop] - out = dict(eeg) - out["data"] = trimmed - out["pnts"] = trimmed.shape[1] - out["xmin"] = start / float(eeg["srate"]) - out["xmax"] = (stop - 1) / float(eeg["srate"]) - return out -""" - -BUGGY_CHANGED = """import numpy as np - - -def trim_eeg(eeg, start_sample, stop_sample): - data = np.asarray(eeg["data"]) - trimmed = data[start_sample:stop_sample, :] - out = dict(eeg) - out["data"] = trimmed - out["pnts"] = stop_sample - start_sample - return out -""" - -BENIGN_CHANGED = """import numpy as np - - -def trim_eeg(eeg, start_sample, stop_sample): - data = np.asarray(eeg["data"]) - start = int(start_sample) - 1 - stop = int(stop_sample) - if start < 0 or stop <= start or stop > data.shape[1]: - raise ValueError("sample range is outside EEG data") - trimmed = data[:, start:stop] - out = dict(eeg) - out["data"] = trimmed - out["pnts"] = trimmed.shape[1] - out["xmin"] = start / float(eeg["srate"]) - out["xmax"] = (stop - 1) / float(eeg["srate"]) - return out -""" - -BUGGY_PROMPT = ( - "Acceptance fixture: this EEG change contains a real EEGPrep-style bug. " - "Review normally and report only concrete defects introduced by the patch." -) -BENIGN_PROMPT = ( - "Calibration fixture: this EEG change validates 1-based sample bounds and " - "preserves channel-major data. Do not flag it unless there is a concrete bug." -) - - -def parse_args(argv: list[str]) -> argparse.Namespace: - parser = argparse.ArgumentParser( - prog="test-review-harness", - description="Create a temporary EEG-style repo and run the adapted autoreview helper.", - ) - parser.add_argument("--fixture", choices=("buggy", "benign"), default="buggy") - parser.add_argument("--engine", action="append", choices=ENGINES, dest="engines") - parser.add_argument( - "--dry-run", action="store_true", help="Verify target/bundle setup without spending a model call." - ) - return parser.parse_args(argv) - - -def run(command: list[str], cwd: Path) -> None: - subprocess.run(command, cwd=cwd, check=True) - - -def write_fixture_file(repo: Path, content: str) -> None: - (repo / "eeg_ops.py").write_text(content, encoding="utf-8", newline="\n") - - -def create_fixture_repo(repo: Path, fixture: str) -> None: - run(["git", "init", "--quiet"], repo) - run(["git", "config", "user.name", "Review Fixture"], repo) - run(["git", "config", "user.email", "review-fixture@example.com"], repo) - write_fixture_file(repo, SAFE_INITIAL) - run(["git", "add", "eeg_ops.py"], repo) - run(["git", "commit", "--quiet", "-m", "initial safe EEG trim"], repo) - write_fixture_file(repo, BUGGY_CHANGED if fixture == "buggy" else BENIGN_CHANGED) - - -def run_reviews(repo: Path, script_dir: Path, fixture: str, engines: list[str], *, dry_run: bool) -> None: - autoreview = script_dir / "autoreview" - for engine in engines: - print(f"== {engine} ==", flush=True) - command = [ - sys.executable, - str(autoreview), - "--mode", - "local", - "--engine", - engine, - "--prompt", - BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT, - ] - if fixture == "buggy": - command.extend(["--require-finding", "channel", "--expect-findings"]) - if dry_run: - command.append("--dry-run") - run(command, repo) - - -def cleanup_repo(repo: Path) -> None: - def make_writable_and_retry(function: Callable[[str], object], path: str, _exc_info: object) -> None: - try: - os.chmod(path, stat.S_IREAD | stat.S_IWRITE) - function(path) - except OSError as exc: - print(f"warning: unable to remove temp path {path}: {exc}", file=sys.stderr) - - if repo.exists(): - shutil.rmtree(repo, onerror=make_writable_and_retry) - - -def main(argv: list[str]) -> int: - args = parse_args(argv) - script_dir = Path(__file__).resolve().parent - repo = Path(tempfile.mkdtemp(prefix="eegprep-autoreview-fixture.")) - try: - create_fixture_repo(repo, args.fixture) - run_reviews(repo, script_dir, args.fixture, args.engines or ["codex"], dry_run=args.dry_run) - except subprocess.CalledProcessError as exc: - return int(exc.returncode or 1) - finally: - cleanup_repo(repo) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) diff --git a/.gitignore b/.gitignore index a9967c33..dbe68211 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .claude/settings.local.json .context/ -.workflow/ .coveragerc .notes/* !.notes/.gitkeep From 3ab3f8a31b61bc3324ca249dc2a610606fbcee61 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Tue, 9 Jun 2026 18:31:22 -0700 Subject: [PATCH 06/16] Add strict code quality review skill --- .../SKILL.md | 100 ++++++++++++++++++ .../agents/openai.yaml | 4 + 2 files changed, 104 insertions(+) create mode 100644 .agents/skills/thermo-nuclear-code-quality-review/SKILL.md create mode 100644 .agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml diff --git a/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md b/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md new file mode 100644 index 00000000..a5d463fe --- /dev/null +++ b/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md @@ -0,0 +1,100 @@ +--- +name: thermo-nuclear-code-quality-review +description: Run an unusually strict EEGPrep code-quality review for architecture, maintainability, abstraction quality, file sprawl, spaghetti branching, and missed simplification. Use for thermo-nuclear review, thermonuclear review, deep maintainability audit, strict architecture review, or when code technically works but may make EEGPrep harder to ship. +--- + +# Thermo-Nuclear Code Quality Review + +Use this for a demanding maintainability review, not a routine bug pass. The goal is to make EEGPrep shippable: standalone, EEGLAB-familiar, easy for EEG researchers, and structurally simple enough for future agents and humans to extend safely. + +Inspired by Cursor's MIT-licensed `thermo-nuclear-code-quality-review` skill: https://github.com/cursor/plugins/blob/main/cursor-team-kit/skills/thermo-nuclear-code-quality-review/SKILL.md + +## Contract + +- Read `AGENTS.md` first and keep its EEGLAB parity, GUI/console, testing, docs, and style rules in force. +- Review the current diff, PR, branch, or named scope. Do not turn this into a whole-codebase rewrite unless asked. +- Be ambitious about simplification, but only flag structural issues with a concrete failure mode or clear maintainability cost. +- Prefer fewer high-conviction findings over a long list of taste comments. +- If asked to fix findings, verify each one from first principles, make the smallest durable change, run focused tests, and commit only when requested. +- Do not rubber-stamp code because tests pass. Passing behavior can still be architecturally wrong. + +## Review Bar + +Ask these questions for every meaningful change: + +- Is there a simpler framing that deletes branches, modes, wrappers, flags, or helper layers? +- Did this add special cases to an already busy flow instead of moving logic to the owning module? +- Is the logic in the canonical EEGPrep layer? +- Does the code preserve EEG dict invariants and EEGLAB-facing semantics without hidden global state? +- Does GUI/menu/console code update `EEGPrepSession`, history, and visible state atomically? +- Is this abstraction earning its keep, or is it a pass-through wrapper? +- Did the change create duplicate helpers instead of reusing an existing contract? +- Did it make data boundaries weaker through unnecessary optionality, casts, duck typing, or silent fallbacks? +- Did a file cross or approach roughly 1000 lines because new concepts were not decomposed? +- Does the code remain understandable to an EEG researcher migrating from EEGLAB? + +## EEGPrep Architecture Boundaries + +Keep ownership clear: + +- `popfunc`: user-facing `pop_*` wrappers, history strings, dialogs, `return_com=True`, and EEGLAB-compatible command surfaces. +- `sigprocfunc`: low-level signal processing and numerical transforms. No GUI/session orchestration here. +- `guifunc`: Qt/inputgui/menu rendering and GUI coordination. No low-level numerical algorithm ownership here. +- `adminfunc`: session, options, console, history, storage, and administrative runtime behavior. +- `plugins/*`: bundled plugin ports and plugin-owned helpers. +- `resources/help`: EEGPrep-owned Help Markdown for user-facing dialogs/functions. +- `eeglab/`: development reference only. Runtime code must not depend on it. + +Flag layer leaks aggressively when they make future behavior harder to reason about. + +## What To Flag + +Flag issues such as: + +- A "works but messy" implementation where a clear code-judo move would delete complexity. +- One-off booleans, nullable modes, or scattered feature checks. +- Repeated conditionals that indicate a missing helper, model, or dispatcher. +- Partial session/history/dataset updates that can leave GUI and console out of sync. +- EEGLAB user-facing indices mixed with Python 0-based indices without an explicit boundary. +- Channel-major EEG data assumptions hidden behind generic array handling. +- New runtime dependency on vendored EEGLAB, local paths, optional files, or unstated environment state. +- Thin wrappers, identity helpers, or generic magic that obscure simple EEG invariants. +- Copy-pasted parsing/history/dialog helpers when a canonical helper exists. +- Large-file growth that should be split into focused modules. +- Tests that only assert implementation details while missing user-observable behavior. + +## Preferred Remedies + +Prefer remedies that: + +- Delete concepts rather than rename them. +- Collapse duplicate branches into one explicit flow. +- Move logic to the module that owns the concept. +- Extract small pure helpers for repeated parsing, shape, or indexing contracts. +- Make state transitions atomic through `EEGPrepSession` helpers. +- Make data boundaries explicit before converting between EEGLAB-facing and Python-facing indices. +- Split large modules by stable ownership, not by arbitrary line count. +- Replace loose optional/cast-heavy code with a concrete contract. +- Add focused tests for externally observable EEG dict, GUI/session, history, or file behavior. + +## Tone And Output + +Lead with findings. For each finding include: + +- file and line; +- why it matters for correctness, parity, or maintainability; +- the concrete scenario or future failure mode; +- a preferred fix direction. + +Order findings by severity: + +1. Structural regressions that can create bugs or block maintainability. +2. Missed simplification that would remove significant complexity. +3. Wrong ownership/layering or canonical-helper duplication. +4. State/session/history atomicity risks. +5. File sprawl and decomposition concerns. +6. Lower-level legibility issues with real cost. + +If there are no actionable issues, say so directly and mention any residual test or review limits. + +Do not soften major structural problems into vague nits. Also do not invent architecture work without a real failure mode. diff --git a/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml b/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml new file mode 100644 index 00000000..9de350b0 --- /dev/null +++ b/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Thermo-Nuclear Code Quality Review" + short_description: "Strict EEGPrep architecture and maintainability review" + default_prompt: "Use $thermo-nuclear-code-quality-review to review the current EEGPrep branch for structural regressions, spaghetti branching, wrong ownership layers, and missed simplifications." From 4e462152b9c1708221dfa936dd42a954e6de5d4e Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Wed, 10 Jun 2026 17:02:35 -0700 Subject: [PATCH 07/16] Fix session sync and MNE conversion invariants --- src/eegprep/functions/adminfunc/console.py | 44 +++--- src/eegprep/functions/guifunc/menu_actions.py | 49 ++++--- src/eegprep/functions/guifunc/session.py | 82 +++++++++++ src/eegprep/functions/miscfunc/eeg_eeg2mne.py | 44 ++---- src/eegprep/functions/miscfunc/eeg_mne2eeg.py | 57 ++------ .../functions/miscfunc/eeg_mne2eeg_epochs.py | 136 ++++++++---------- src/eegprep/functions/sigprocfunc/epoch.py | 11 +- tests/test_console_workspace.py | 20 +++ tests/test_eeg_eeg2mne.py | 20 ++- tests/test_eeg_mne2eeg.py | 16 +++ tests/test_eeg_mne2eeg_epochs.py | 80 +++++++---- tests/test_gui_main_window.py | 15 +- tests/test_pop_epoch.py | 23 +++ 13 files changed, 360 insertions(+), 237 deletions(-) diff --git a/src/eegprep/functions/adminfunc/console.py b/src/eegprep/functions/adminfunc/console.py index 9f8ecfb4..1793f743 100644 --- a/src/eegprep/functions/adminfunc/console.py +++ b/src/eegprep/functions/adminfunc/console.py @@ -352,19 +352,18 @@ def after_execute(self, source: str, *, success: bool = True) -> None: history_command = self._history_command_for_source(source, targets) changed = False - if "ALLEEG" in targets: + if "ALLEEG" in targets or "CURRENTSET" in targets: alleeg = self.namespace.get("ALLEEG", []) if not isinstance(alleeg, list): raise ValueError("ALLEEG must be a list of EEG datasets") - self.session.ALLEEG = alleeg - changed = True - - if "CURRENTSET" in targets: - current = _normalize_currentset(self.namespace.get("CURRENTSET")) - if current: - self.session.retrieve(current if len(current) > 1 else current[0]) - else: - self.session.CURRENTSET = [] + current = ( + _normalize_currentset(self.namespace.get("CURRENTSET")) + if "CURRENTSET" in targets + else self.session.CURRENTSET + ) + self.session.apply_workspace_state( + alleeg=alleeg, currentset=current, command="", append_dataset_history=False + ) changed = True if self._namespace_eeg_changed(targets): @@ -380,16 +379,16 @@ def after_execute(self, source: str, *, success: bool = True) -> None: changed = True if "STUDY" in targets: - self.session.STUDY = self.namespace.get("STUDY") - if "CURRENTSTUDY" not in targets: - self.session.CURRENTSTUDY = 1 if self.session.STUDY else 0 + study_kwargs: dict[str, Any] = {"study": self.namespace.get("STUDY"), "command": ""} + if "CURRENTSTUDY" in targets: + study_kwargs["currentstudy"] = self.namespace.get("CURRENTSTUDY") + self.session.apply_workspace_state(**study_kwargs) changed = True - if "CURRENTSTUDY" in targets: - self.session.CURRENTSTUDY = int(self.namespace.get("CURRENTSTUDY") or 0) + elif "CURRENTSTUDY" in targets: + self.session.apply_workspace_state(currentstudy=self.namespace.get("CURRENTSTUDY"), command="") changed = True if changed: - self.session.notify_changed() if history_command and history_command != self.session.LASTCOM: self.session.add_history(history_command) self.pull_from_session() @@ -401,12 +400,13 @@ def accept_pop_result(self, result: Any, args: tuple[Any, ...], kwargs: Mapping[ dataset_state = _extract_pop_dataset_state(result) if dataset_state is not None: alleeg, eeg, currentset, command = dataset_state - self.session.ALLEEG = alleeg - self.session.EEG = eeg - self.session.CURRENTSET = _normalize_currentset(currentset) - if command: - self.session.add_history(command, notify=False) - self.session.notify_changed() + self.session.apply_workspace_state( + alleeg=alleeg, + eeg=eeg, + currentset=currentset, + command=command, + append_dataset_history=False, + ) self._pop_updated_session = True self.pull_from_session() self._refresh() diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py index a5254755..b0c80e7a 100644 --- a/src/eegprep/functions/guifunc/menu_actions.py +++ b/src/eegprep/functions/guifunc/menu_actions.py @@ -903,14 +903,23 @@ def _run_script(self, parent: Any | None) -> None: "EEG": self.session.EEG, "ALLEEG": self.session.ALLEEG, "CURRENTSET": self.session.current_set_value(), + "ALLCOM": list(self.session.ALLCOM), + "LASTCOM": self.session.LASTCOM, "STUDY": self.session.STUDY, + "CURRENTSTUDY": self.session.CURRENTSTUDY, } command = pop_runscript(filename, namespace) - self.session.EEG = namespace.get("EEG", self.session.EEG) - self.session.ALLEEG = namespace.get("ALLEEG", self.session.ALLEEG) - self.session.CURRENTSET = _currentset_list(namespace.get("CURRENTSET", self.session.current_set_value())) - self.session.STUDY = namespace.get("STUDY", self.session.STUDY) - self._add_history_from_gui(command) + self.session.echo_command(command) + self.session.apply_workspace_state( + eeg=namespace.get("EEG", self.session.EEG), + alleeg=namespace.get("ALLEEG", self.session.ALLEEG), + currentset=namespace.get("CURRENTSET", self.session.current_set_value()), + allcom=namespace.get("ALLCOM", self.session.ALLCOM), + lastcom=namespace.get("LASTCOM", self.session.LASTCOM), + study=namespace.get("STUDY", self.session.STUDY), + currentstudy=namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY), + command=command, + ) self._refresh() def _bids_tool_action(self, action: str, parent: Any | None) -> None: @@ -942,8 +951,8 @@ def _bids_tool_action(self, action: str, parent: Any | None) -> None: updated, command = getattr(bids_tools, action)(target, **metadata) if self.session.CURRENTSTUDY == 1 and self.session.STUDY: - self.session.STUDY = updated - self._add_history_from_gui(command) + self.session.echo_command(command) + self.session.set_study(updated, command=command) else: self._store_current_from_gui(updated, command=command) self._refresh() @@ -1045,11 +1054,8 @@ def _apply_extension_result(self, result: Any) -> None: dataset_state = _extension_dataset_state(result) if dataset_state is not None: alleeg, eeg_out, currentset, command = dataset_state - self.session.ALLEEG = alleeg - self.session.EEG = eeg_out - self.session.CURRENTSET = _currentset_list(currentset) - self._add_history_from_gui(command) - self.session.notify_changed() + self.session.echo_command(command) + self.session.apply_workspace_state(alleeg=alleeg, eeg=eeg_out, currentset=currentset, command=command) self._refresh() return eeg_out, command = _extension_eeg_and_command(result) @@ -1360,11 +1366,8 @@ def _copy_current_dataset(self, parent: Any | None) -> None: alleeg, eeg_out, current_set, command = pop_copyset(self.session.ALLEEG, set_in, gui=True, return_com=True) if not command: return - self.session.ALLEEG = alleeg - self.session.EEG = eeg_out - self.session.CURRENTSET = _currentset_list(current_set) - self._add_history_from_gui(command) - self.session.notify_changed() + self.session.echo_command(command) + self.session.apply_workspace_state(alleeg=alleeg, eeg=eeg_out, currentset=current_set, command=command) self._refresh() def _merge_datasets(self, parent: Any | None) -> None: @@ -1540,8 +1543,8 @@ def _run_chanplot(self, parent: Any | None) -> None: study, command, _figure = pop_chanplot(self.session.STUDY, self.session.ALLEEG, gui=True, return_com=True) if not command: return - self.session.STUDY = study - self._add_history_from_gui(command) + self.session.echo_command(command) + self.session.set_study(study, command=command) self._refresh() def _store_current_from_gui(self, eeg: Any, **kwargs: Any) -> Any: @@ -1577,11 +1580,7 @@ def _commit_processed_dataset_from_gui(self, eeg: Any, *, command: str, parent: self.session.echo_command(command) self.session.add_history(command, notify=False) self.session.echo_command(newset_command) - self.session.ALLEEG = alleeg - self.session.EEG = current - self.session.CURRENTSET = _currentset_list(current_set) - self.session.add_history(newset_command, notify=False) - self.session.notify_changed() + self.session.apply_workspace_state(alleeg=alleeg, eeg=current, currentset=current_set, command=newset_command) def _add_history_from_gui(self, command: str | None) -> None: self.session.echo_command(command) @@ -1592,7 +1591,7 @@ def _retrieve_dataset(self, index: int) -> None: self.session.retrieve(index) command = f"[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, 'retrieve', {index});" if was_study: - self.session.CURRENTSTUDY = 0 + self.session.apply_workspace_state(currentstudy=0) command = f"CURRENTSTUDY = 0;{command}" self._add_history_from_gui(command) self._refresh() diff --git a/src/eegprep/functions/guifunc/session.py b/src/eegprep/functions/guifunc/session.py index f2028ceb..6c8df35f 100644 --- a/src/eegprep/functions/guifunc/session.py +++ b/src/eegprep/functions/guifunc/session.py @@ -18,6 +18,9 @@ from eegprep.functions.popfunc.eeg_emptyset import eeg_emptyset +_UNSET = object() + + def has_eeg_data(eeg: Any) -> bool: """Return whether an EEG-like object contains non-empty data.""" if not isinstance(eeg, dict): @@ -223,6 +226,62 @@ def retrieve(self, indices: int | list[int]) -> dict[str, Any] | list[dict[str, self.notify_changed() return eeg + def apply_workspace_state( + self, + *, + eeg: Any = _UNSET, + alleeg: Any = _UNSET, + currentset: Any = _UNSET, + allcom: Any = _UNSET, + lastcom: Any = _UNSET, + study: Any = _UNSET, + currentstudy: Any = _UNSET, + command: str = "", + append_dataset_history: bool = False, + ) -> None: + """Apply a GUI/console workspace update as one session transaction.""" + dataset_changed = eeg is not _UNSET or alleeg is not _UNSET or currentset is not _UNSET + if dataset_changed: + resolved_alleeg = self.ALLEEG if alleeg is _UNSET else alleeg + if not isinstance(resolved_alleeg, list): + raise ValueError("ALLEEG must be a list of EEG datasets") + resolved_currentset = ( + list(self.CURRENTSET) + if currentset is _UNSET + else normalize_dataset_indices(currentset, allow_empty=True) + ) + if resolved_currentset and max(resolved_currentset) > len(resolved_alleeg): + raise ValueError("CURRENTSET contains indices outside ALLEEG") + resolved_eeg = self._resolve_workspace_eeg(eeg, resolved_alleeg, resolved_currentset) + self.ALLEEG = resolved_alleeg + self.EEG = resolved_eeg + self.CURRENTSET = resolved_currentset + self._mirror_current_eeg_into_alleeg() + if append_dataset_history: + self._append_current_dataset_history(command) + offload_storedisk_datasets(self.ALLEEG, set(self.CURRENTSET)) + + if allcom is not _UNSET: + if not isinstance(allcom, list): + raise ValueError("ALLCOM must be a list of command strings") + self.ALLCOM = [str(item) for item in allcom if str(item).strip()] + self.LASTCOM = self.ALLCOM[-1] if self.ALLCOM else "" + if lastcom is not _UNSET: + last_command = str(lastcom or "").strip() + if last_command and (not self.ALLCOM or self.ALLCOM[-1] != last_command): + self.ALLCOM.append(last_command) + self.LASTCOM = last_command + + if study is not _UNSET: + self.STUDY = study + if currentstudy is _UNSET: + self.CURRENTSTUDY = 1 if study else 0 + if currentstudy is not _UNSET: + self.CURRENTSTUDY = int(currentstudy or 0) + + self.add_history(command, notify=False) + self.notify_changed() + def delete_current(self) -> None: """Delete the current dataset selection from memory.""" if not self.CURRENTSET: @@ -271,6 +330,29 @@ def set_study( self.add_history(command, notify=False) self.notify_changed() + def _resolve_workspace_eeg( + self, + eeg: Any, + alleeg: list[dict[str, Any]], + currentset: list[int], + ) -> dict[str, Any] | list[dict[str, Any]]: + if eeg is not _UNSET: + return eeg + if not currentset: + return eeg_emptyset() + selected = [alleeg[index - 1] for index in currentset] + return selected if len(selected) > 1 else selected[0] + + def _mirror_current_eeg_into_alleeg(self) -> None: + if not self.CURRENTSET: + return + current = self.EEG if isinstance(self.EEG, list) else [self.EEG] + if len(current) != len(self.CURRENTSET): + raise ValueError("EEG selection length must match CURRENTSET") + for index, eeg in zip(self.CURRENTSET, current): + if 1 <= index <= len(self.ALLEEG): + self.ALLEEG[index - 1] = eeg + def select_study(self, *, command: str = "CURRENTSTUDY = 1") -> None: """Select the current STUDY set in the shared workspace.""" if not self.STUDY: diff --git a/src/eegprep/functions/miscfunc/eeg_eeg2mne.py b/src/eegprep/functions/miscfunc/eeg_eeg2mne.py index 574353c2..0d026766 100644 --- a/src/eegprep/functions/miscfunc/eeg_eeg2mne.py +++ b/src/eegprep/functions/miscfunc/eeg_eeg2mne.py @@ -1,13 +1,13 @@ """EEG to MNE conversion functions.""" -from ..popfunc.pop_loadset import pop_loadset -import mne +from pathlib import Path import tempfile -import os + +import mne + from ..popfunc.pop_saveset import pop_saveset # in development -# write a funtion that converts a MNE raw object to an EEGLAB set file def eeg_eeg2mne(EEG): """Convert EEG data structure to MNE Raw object. @@ -21,34 +21,10 @@ def eeg_eeg2mne(EEG): raw : mne.io.Raw MNE Raw object """ - # Generate a temporary file name - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - temp_file_path = temp_file.name - - base, _ = os.path.splitext(temp_file_path) - new_temp_file_path = base + ".set" - - # save the raw file as a new EEGLAB .set file using MNE EEGLAB writer - pop_saveset(EEG, new_temp_file_path) - - # load the EEGLAB set file - if EEG['trials'] > 1: - raw = mne.io.read_epochs_eeglab(new_temp_file_path) - else: - raw = mne.io.read_raw_eeglab(new_temp_file_path, preload=True) - - return raw - - -def test_eeg_eeg2mne(): - """Test the eeg_eeg2mne function.""" - eeglab_file_path = './eeglab_data_with_ica_tmp.set' - eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set' - EEG = pop_loadset(eeglab_file_path) - raw = eeg_eeg2mne(EEG) - - # print the keys of the EEG dictionary - print(raw.info) - + with tempfile.TemporaryDirectory(prefix="eegprep-eeg2mne-") as temp_dir: + set_path = Path(temp_dir) / "bridge.set" + pop_saveset(EEG, str(set_path)) -# test_eeg_eeg2mne() + if EEG['trials'] > 1: + return mne.io.read_epochs_eeglab(str(set_path)) + return mne.io.read_raw_eeglab(str(set_path), preload=True) diff --git a/src/eegprep/functions/miscfunc/eeg_mne2eeg.py b/src/eegprep/functions/miscfunc/eeg_mne2eeg.py index 0e14c253..6f0eb1a2 100644 --- a/src/eegprep/functions/miscfunc/eeg_mne2eeg.py +++ b/src/eegprep/functions/miscfunc/eeg_mne2eeg.py @@ -1,11 +1,12 @@ """MNE to EEG conversion functions.""" -from ..popfunc.pop_loadset import pop_loadset -import mne +from pathlib import Path import tempfile -import os + +import mne from mne.export import export_raw, export_epochs -import numpy as np + +from ..popfunc.pop_loadset import pop_loadset def _mne_events_to_eeglab_events(raw_or_epochs): @@ -38,7 +39,6 @@ def _mne_events_to_eeglab_events(raw_or_epochs): return events -# write a funtion that converts a MNE raw object to an EEGLAB set file def eeg_mne2eeg(raw): """Convert MNE Raw object to EEG data structure. @@ -54,49 +54,16 @@ def eeg_mne2eeg(raw): """ raw_or_epochs = raw - # Generate a temporary file name - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - temp_file_path = temp_file.name - - base, _ = os.path.splitext(temp_file_path) - new_temp_file_path = base + ".set" - - # save the raw/epochs file as a new EEGLAB .set file using MNE EEGLAB writer - if isinstance(raw_or_epochs, mne.BaseEpochs): - export_epochs(new_temp_file_path, raw_or_epochs, fmt='eeglab') - else: - export_raw(new_temp_file_path, raw_or_epochs, fmt='eeglab') - - # load the EEGLAB set file - EEG = pop_loadset(new_temp_file_path) + with tempfile.TemporaryDirectory(prefix="eegprep-mne2eeg-") as temp_dir: + set_path = Path(temp_dir) / "bridge.set" + if isinstance(raw_or_epochs, mne.BaseEpochs): + export_epochs(str(set_path), raw_or_epochs, fmt='eeglab') + else: + export_raw(str(set_path), raw_or_epochs, fmt='eeglab') + EEG = pop_loadset(str(set_path)) # Inject events/annotations from MNE object into EEGLAB structure eeglab_events = _mne_events_to_eeglab_events(raw_or_epochs) if eeglab_events: EEG['event'] = eeglab_events - return EEG - - -def test_eeg_mne2eeg(): - """Test the eeg_mne2eeg function.""" - eeglab_file_path = './eeglab_data_with_ica_tmp.set' - eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set' - EEG = pop_loadset(eeglab_file_path) - - # create MNE info structure - info = mne.create_info(ch_names=[x['labels'] for x in EEG['chanlocs']], sfreq=EEG['srate'], ch_types='eeg') - if EEG['trials'] > 1: - events = np.array([[i, 0, 1] for i in range(EEG['trials'])]) # NOT CORRECT CONVERTION JUST FOR TESTING - event_id = dict(dummy=1) - raw = mne.EpochsArray(EEG['data'].transpose(2, 0, 1), info, events, tmin=0, event_id=event_id) - else: - raw = mne.io.RawArray(EEG['data'], info) - - EEG2 = eeg_mne2eeg(raw) - - # print the keys of the EEG dictionary - print(EEG2.keys()) - - -# test_eeg_mne2eeg() diff --git a/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py b/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py index 876dda17..808f8481 100644 --- a/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py +++ b/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py @@ -1,17 +1,15 @@ """MNE epochs to EEGLAB dataset conversion utilities.""" -# Example to export MNE epochs to EEGLAB dataset -# Events are not handled correctly in this example but it works - -import mne -from mne.preprocessing import ICA +import logging import math import numpy as np -from scipy.io import savemat + +from eegprep.functions.miscfunc.misc import finite_matmul, finite_pinv + +logger = logging.getLogger(__name__) -# Load example data def eeg_mne2eeg_epochs(epochs, ica): """Convert MNE epochs with ICA to EEGLAB dataset format. @@ -27,29 +25,27 @@ def eeg_mne2eeg_epochs(epochs, ica): dict EEGLAB-compatible dataset dictionary. """ - # export to EEGLAB dataset - data = epochs.get_data() # Get the data from the epochs - n_epochs, n_channels, n_times = data.shape - ica_weights = ica.get_components() # ICA weights (n_components x n_channels) - - # create identity matrix of size n_channels x n_channels - ica_sphere = np.eye(n_channels) # ICA sphere (n_channels x n_channels) - - # Compute the mixing matrix (inverse weights) - ica_inverse_weights = np.linalg.pinv(ica_weights) # Shape: (n_channels, n_components) + mne_data = epochs.get_data(copy=True) + n_epochs, n_channels, n_times = mne_data.shape + data = np.transpose(mne_data, (1, 2, 0)) ica_channels = ica.info['ch_names'] raw_channels = epochs.info['ch_names'] # Assuming you have the raw object ica_channel_indices = [raw_channels.index(ch) for ch in ica_channels] ica_channel_indices = np.array(ica_channel_indices) - ica_act = ica.get_sources(epochs).get_data(copy=True).transpose(1, 2, 0) # Get the ICA activations + ica_weights, ica_sphere, ica_inverse_weights, ica_act = _mne_ica_to_eeglab_fields( + ica, + data[ica_channel_indices], + n_times, + n_epochs, + ) - print('Reference conversion may not be accurate...') if 'custom_ref_applied' in epochs.info and epochs.info['custom_ref_applied']: ref = 'common' # Custom reference was applied else: ref = 'average' # Default to average reference + logger.info("MNE reference metadata converted to EEGPrep ref=%s.", ref) eeglab_dict = { 'setname': '', @@ -70,8 +66,8 @@ def eeg_mne2eeg_epochs(epochs, ica): 'data': data, 'icaact': ica_act, 'icawinv': ica_inverse_weights, - 'icasphere': ica_weights, - 'icaweights': ica_sphere, + 'icasphere': ica_sphere, + 'icaweights': ica_weights, 'icachansind': ica_channel_indices, 'chanlocs': np.array([]), 'urchanlocs': np.array([]), @@ -111,21 +107,27 @@ def eeg_mne2eeg_epochs(epochs, ica): Y_all = [] Z_all = [] for ch in ch_locs: - if 'loc' in ch and ch['loc'] is not None: - X_all.append(ch['loc'][1] * 1000) - Y_all.append(-ch['loc'][0] * 1000) - Z_all.append(ch['loc'][2] * 1000) - hypotxy = math.hypot(X_all[-1], Y_all[-1]) - sph_radius_all.append(math.hypot(hypotxy, Z_all[-1])) - - az = math.atan2(Y_all[-1], X_all[-1]) / math.pi * 180 - horiz = math.atan2(Z_all[-1], hypotxy) / math.pi * 180 - - sph_theta_all.append(az) - sph_phi_all.append(horiz) - - theta_all.append(-az) # warning inverse notation compared to MATLAB to match - radius_all.append(0.5 - horiz / 180) # warning inverse notation compared to MATLAB to match + loc = ch.get('loc') if isinstance(ch, dict) else None + if loc is None or len(loc) < 3: + x = y = z = 0.0 + else: + x = float(loc[1]) * 1000 + y = -float(loc[0]) * 1000 + z = float(loc[2]) * 1000 + X_all.append(x) + Y_all.append(y) + Z_all.append(z) + hypotxy = math.hypot(x, y) + sph_radius_all.append(math.hypot(hypotxy, z)) + + az = math.atan2(y, x) / math.pi * 180 + horiz = math.atan2(z, hypotxy) / math.pi * 180 + + sph_theta_all.append(az) + sph_phi_all.append(horiz) + + theta_all.append(-az) # warning inverse notation compared to MATLAB to match + radius_all.append(0.5 - horiz / 180) # warning inverse notation compared to MATLAB to match d_list = [ { @@ -166,43 +168,31 @@ def eeg_mne2eeg_epochs(epochs, ica): d_list = np.array(d_list) eeglab_dict['chanlocs'] = d_list - # # Step 4: Save the EEGLAB dataset as a .mat file return eeglab_dict - # print("EEGLAB dataset saved successfully!") - - -def test_eeg_mne2eeg_epochs(): - """Test the eeg_mne2eeg_epochs function with sample MNE data.""" - sample_data_folder = mne.datasets.sample.data_path() - sample_data_raw_file = sample_data_folder / "MEG" / "sample" / "sample_audvis_filt-0-40_raw.fif" - - raw = mne.io.read_raw_fif(sample_data_raw_file) - - # extract data epochs - events = mne.find_events(raw, stim_channel="STI 014") - event_dict = { - "auditory/left": 1, - "auditory/right": 2, - "visual/left": 3, - "visual/right": 4, - "smiley": 5, - "buttonpress": 32, - } - epochs = mne.Epochs( - raw, - events, - event_id=event_dict, - tmin=-0.2, - tmax=0.5, - preload=True, - ) - - ica = ICA(n_components=15, random_state=97, max_iter=800) - ica.fit(raw) - - EEG = eeg_mne2eeg_epochs(epochs, ica) - savemat('output_file.mat', EEG) # use pop_saveset - -# test_eeg_mne2eeg_epochs() +def _mne_ica_to_eeglab_fields(ica, data, n_times, n_epochs): + n_components = int(ica.n_components_) + n_ica_channels = data.shape[0] + prewhitener = _prewhitener_matrix(ica, n_ica_channels) + pca_unmixing = finite_matmul(np.asarray(ica.unmixing_matrix_), np.asarray(ica.pca_components_)[:n_components]) + unmixing = finite_matmul(pca_unmixing, prewhitener) + sphere = np.eye(n_ica_channels) + inverse_weights = finite_pinv(unmixing) + activations_2d = finite_matmul(unmixing, data.reshape(n_ica_channels, -1, order="F")) + activations = activations_2d.reshape(n_components, n_times, n_epochs, order="F") + return unmixing, sphere, inverse_weights, activations + + +def _prewhitener_matrix(ica, n_channels): + prewhitener = np.asarray(ica.pre_whitener_) + if ica.noise_cov is not None: + if prewhitener.shape != (n_channels, n_channels): + raise ValueError("MNE ICA pre-whitener has incompatible shape") + return prewhitener + values = prewhitener.reshape(-1) + if values.size == 1: + return np.eye(n_channels) / float(values[0]) + if values.size != n_channels: + raise ValueError("MNE ICA pre-whitener has incompatible shape") + return np.diag(1.0 / values) diff --git a/src/eegprep/functions/sigprocfunc/epoch.py b/src/eegprep/functions/sigprocfunc/epoch.py index 3afb2eff..c3ac596f 100644 --- a/src/eegprep/functions/sigprocfunc/epoch.py +++ b/src/eegprep/functions/sigprocfunc/epoch.py @@ -4,10 +4,14 @@ locked to specified events. """ +import logging + import numpy as np from ..miscfunc.misc import round_mat +logger = logging.getLogger(__name__) + def epoch(data, events, lim, **kwargs): """ @@ -51,7 +55,8 @@ def _as_1d(a): reallim[1] = int(round_mat(lim[1] * g['srate'] - 1)) # minus 1 sample # --- epoching --- - print('Epoching...') + if g['verbose'] == 'on': + logger.info('Epoching...') newdatalength = int(reallim[1] - reallim[0] + 1) @@ -116,12 +121,12 @@ def _as_1d(a): indexes[index] = 1 else: if g['verbose'] == 'on': - print(f'Warning: event {index + 1} out of value limits') + logger.warning('event %s out of value limits', index + 1) else: indexes[index] = 1 else: if g['verbose'] == 'on': - print(f'Warning: event {index + 1} out of data boundary') + logger.warning('event %s out of data boundary', index + 1) # Re-reference events if g['allevents'] is not None and g['allevents'].size > 0: diff --git a/tests/test_console_workspace.py b/tests/test_console_workspace.py index d996217a..53780922 100644 --- a/tests/test_console_workspace.py +++ b/tests/test_console_workspace.py @@ -809,6 +809,26 @@ def test_bare_legacy_pop_averef_alias_updates_session_history(): workspace.close() +def test_console_dataset_state_result_updates_session_once_without_duplicate_history(): + session = EEGPrepSession() + session.store_current(_demo_eeg("one"), new=True) + session.store_current(_demo_eeg("two"), new=True) + refresh = mock.Mock() + workspace = EEGPrepConsoleWorkspace(session, refresh=refresh, exports={}) + first = dict(session.ALLEEG[0], setname="one edited") + second = dict(session.ALLEEG[1], setname="two edited") + command = "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, retrieve=[2, 1]);" + + result = workspace.accept_pop_result(([first, second], [second, first], [2, 1], command), (), {}) + + assert list(result)[3] == command + assert session.CURRENTSET == [2, 1] + assert [item["setname"] for item in session.EEG] == ["two edited", "one edited"] + assert [item["setname"] for item in session.ALLEEG] == ["one edited", "two edited"] + assert session.ALLCOM == [command] + refresh.assert_called_once() + + def test_pop_call_without_history_command_records_raw_console_source(): session = EEGPrepSession() session.store_current(_demo_eeg(), new=True) diff --git a/tests/test_eeg_eeg2mne.py b/tests/test_eeg_eeg2mne.py index c0a69a68..a182e270 100644 --- a/tests/test_eeg_eeg2mne.py +++ b/tests/test_eeg_eeg2mne.py @@ -9,6 +9,7 @@ import numpy as np import tempfile import shutil +from unittest import mock from eegprep.functions.miscfunc.eeg_eeg2mne import eeg_eeg2mne @@ -26,9 +27,6 @@ except (ImportError, ValueError): from fixtures import create_test_eeg -if os.getenv('EEGPREP_SKIP_MATLAB') == '1': - raise unittest.SkipTest("MATLAB not available") - class TestEEGEEG2MNE(unittest.TestCase): """Test cases for eeg_eeg2mne function.""" @@ -60,6 +58,22 @@ def test_eeg_eeg2mne_continuous_data(self): self.assertEqual(result.info['nchan'], continuous_eeg['nbchan']) self.assertEqual(result.n_times, continuous_eeg['pnts']) + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") + def test_eeg_eeg2mne_cleans_temporary_bridge_files(self): + continuous_eeg = self.test_eeg.copy() + continuous_eeg['data'] = np.random.randn(32, 1000) + continuous_eeg['trials'] = 1 + real_tempdir = tempfile.TemporaryDirectory + + def tempdir_factory(*args, **kwargs): + kwargs["dir"] = self.temp_dir + return real_tempdir(*args, **kwargs) + + with mock.patch("eegprep.functions.miscfunc.eeg_eeg2mne.tempfile.TemporaryDirectory", tempdir_factory): + eeg_eeg2mne(continuous_eeg) + + self.assertEqual(os.listdir(self.temp_dir), []) + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") def test_eeg_eeg2mne_epoched_data(self): """Test conversion of epoched EEG data.""" diff --git a/tests/test_eeg_mne2eeg.py b/tests/test_eeg_mne2eeg.py index 8b9000a7..16f0bbfb 100644 --- a/tests/test_eeg_mne2eeg.py +++ b/tests/test_eeg_mne2eeg.py @@ -10,6 +10,7 @@ import tempfile import os import shutil +from unittest import mock # Add src to path for imports sys.path.insert(0, 'src') @@ -92,6 +93,21 @@ def test_eeg_mne2eeg_raw_object(self): except Exception as e: self.skipTest(f"eeg_mne2eeg raw conversion not available: {e}") + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") + def test_eeg_mne2eeg_cleans_temporary_bridge_files(self): + info = mne.create_info(["EEG001", "EEG002"], 100.0, ch_types='eeg') + raw = mne.io.RawArray(np.random.randn(2, 100), info) + real_tempdir = tempfile.TemporaryDirectory + + def tempdir_factory(*args, **kwargs): + kwargs["dir"] = self.temp_dir + return real_tempdir(*args, **kwargs) + + with mock.patch("eegprep.functions.miscfunc.eeg_mne2eeg.tempfile.TemporaryDirectory", tempdir_factory): + eeg_mne2eeg(raw) + + self.assertEqual(os.listdir(self.temp_dir), []) + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") def test_eeg_mne2eeg_epochs_object(self): """Test conversion of MNE Epochs object.""" diff --git a/tests/test_eeg_mne2eeg_epochs.py b/tests/test_eeg_mne2eeg_epochs.py index 1f120272..3964e3cf 100644 --- a/tests/test_eeg_mne2eeg_epochs.py +++ b/tests/test_eeg_mne2eeg_epochs.py @@ -4,6 +4,8 @@ This module tests the eeg_mne2eeg_epochs function that converts MNE Epochs with ICA to EEGLAB datasets. """ +import contextlib +import io import unittest import os import numpy as np @@ -11,6 +13,7 @@ import shutil from eegprep.functions.miscfunc.eeg_mne2eeg_epochs import eeg_mne2eeg_epochs +from eegprep.functions.miscfunc.misc import finite_matmul, finite_pinv try: import mne @@ -25,9 +28,6 @@ except (ImportError, ValueError): from fixtures import create_test_eeg -if os.getenv('EEGPREP_SKIP_MATLAB') == '1': - raise unittest.SkipTest("MATLAB not available") - class TestEEGMNE2EEGEpochs(unittest.TestCase): """Test cases for eeg_mne2eeg_epochs function.""" @@ -85,6 +85,28 @@ def test_eeg_mne2eeg_epochs_basic_functionality(self): except Exception as e: self.skipTest(f"eeg_mne2eeg_epochs basic functionality not available: {e}") + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") + def test_eeg_mne2eeg_epochs_uses_channel_major_data_without_stdout(self): + n_channels = 4 + n_times = 20 + n_epochs = 3 + sfreq = 100.0 + ch_names = [f'EEG{i:03d}' for i in range(n_channels)] + info = mne.create_info(ch_names, sfreq, ch_types='eeg') + data = np.arange(n_epochs * n_channels * n_times, dtype=float).reshape(n_epochs, n_channels, n_times) + events = np.array([[i, 0, 1] for i in range(n_epochs)]) + epochs = mne.EpochsArray(data, info, events, tmin=0, event_id={'event': 1}, verbose=False) + ica = ICA(n_components=2, random_state=42, max_iter=20) + ica.fit(epochs, verbose=False) + + stream = io.StringIO() + with contextlib.redirect_stdout(stream): + result = eeg_mne2eeg_epochs(epochs, ica) + + self.assertEqual(stream.getvalue(), "") + self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs)) + np.testing.assert_allclose(result['data'], np.transpose(data, (1, 2, 0))) + @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") def test_eeg_mne2eeg_epochs_ica_fields(self): """Test ICA fields in the converted EEGLAB dataset.""" @@ -106,25 +128,23 @@ def test_eeg_mne2eeg_epochs_ica_fields(self): ica = ICA(n_components=8, random_state=42) ica.fit(epochs) - try: - result = eeg_mne2eeg_epochs(epochs, ica) - - # Check ICA fields - self.assertIn('icaact', result) - self.assertIn('icawinv', result) - self.assertIn('icasphere', result) - self.assertIn('icaweights', result) - self.assertIn('icachansind', result) - - # Check ICA field shapes - self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs)) # n_components x n_times x n_epochs - self.assertEqual(result['icawinv'].shape, (8, n_channels)) # n_components x n_channels - self.assertEqual(result['icasphere'].shape, (n_channels, 8)) # n_channels x n_components - self.assertEqual(result['icaweights'].shape, (n_channels, n_channels)) # identity matrix - self.assertEqual(len(result['icachansind']), n_channels) # channel indices - - except Exception as e: - self.skipTest(f"eeg_mne2eeg_epochs ICA fields not available: {e}") + result = eeg_mne2eeg_epochs(epochs, ica) + + self.assertIn('icaact', result) + self.assertIn('icawinv', result) + self.assertIn('icasphere', result) + self.assertIn('icaweights', result) + self.assertIn('icachansind', result) + self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs)) + self.assertEqual(result['icawinv'].shape, (n_channels, 8)) + self.assertEqual(result['icasphere'].shape, (n_channels, n_channels)) + self.assertEqual(result['icaweights'].shape, (8, n_channels)) + self.assertEqual(len(result['icachansind']), n_channels) + unmixing = finite_matmul(result['icaweights'], result['icasphere']) + data_2d = result['data'][result['icachansind']].reshape(n_channels, -1, order="F") + icaact_2d = result['icaact'].reshape(8, -1, order="F") + np.testing.assert_allclose(finite_matmul(unmixing, data_2d), icaact_2d, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(finite_pinv(unmixing), result['icawinv'], rtol=1e-10, atol=1e-10) @unittest.skipUnless(MNE_AVAILABLE, "MNE not available") def test_eeg_mne2eeg_epochs_channel_locations(self): @@ -252,9 +272,8 @@ def test_eeg_mne2eeg_epochs_single_epoch(self): try: result = eeg_mne2eeg_epochs(epochs, ica) - # Check data dimensions (data is in MNE format: n_epochs x n_channels x n_times) self.assertEqual(result['trials'], 1) - self.assertEqual(result['data'].shape, (n_epochs, n_channels, n_times)) + self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs)) self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs)) except Exception as e: @@ -286,8 +305,8 @@ def test_eeg_mne2eeg_epochs_minimal_channels(self): result = eeg_mne2eeg_epochs(epochs, ica) # Check data dimensions - self.assertEqual(result['nbchan'], 1) - self.assertEqual(result['data'].shape, (1, n_times, n_epochs)) + self.assertEqual(result['nbchan'], n_channels) + self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs)) self.assertEqual(result['icaact'].shape, (2, n_times, n_epochs)) except Exception as e: @@ -317,10 +336,9 @@ def test_eeg_mne2eeg_epochs_short_data(self): try: result = eeg_mne2eeg_epochs(epochs, ica) - # Check data dimensions (data is in MNE format: n_epochs x n_channels x n_times) self.assertEqual(result['pnts'], 10) self.assertEqual(result['trials'], 3) - self.assertEqual(result['data'].shape, (n_epochs, n_channels, n_times)) + self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs)) except Exception as e: self.skipTest(f"eeg_mne2eeg_epochs short data not available: {e}") @@ -484,9 +502,9 @@ def test_eeg_mne2eeg_epochs_integration_workflow(self): # Check ICA properties self.assertEqual(result['icaact'].shape, (15, 200, 20)) - self.assertEqual(result['icawinv'].shape, (15, 32)) - self.assertEqual(result['icasphere'].shape, (32, 15)) - self.assertEqual(result['icaweights'].shape, (32, 32)) + self.assertEqual(result['icawinv'].shape, (32, 15)) + self.assertEqual(result['icasphere'].shape, (32, 32)) + self.assertEqual(result['icaweights'].shape, (15, 32)) self.assertEqual(len(result['icachansind']), 32) # Check channel locations diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py index a9d58b37..26b43888 100644 --- a/tests/test_gui_main_window.py +++ b/tests/test_gui_main_window.py @@ -1329,7 +1329,14 @@ def test_file_menu_runscript_updates_currentset_from_namespace(self): qt_widgets = _fake_qt_widgets(open_file="/tmp/script.py") def fake_runscript(_filename, namespace): + self.assertIn("ALLCOM", namespace) + self.assertIn("LASTCOM", namespace) + self.assertIn("CURRENTSTUDY", namespace) namespace["CURRENTSET"] = 2 + namespace["ALLCOM"].append("EEG = script_command(EEG);") + namespace["LASTCOM"] = "EEG = script_command(EEG);" + namespace["STUDY"] = {"name": "script study"} + namespace["CURRENTSTUDY"] = 1 return "LASTCOM = pop_runscript('/tmp/script.py');" with ( @@ -1339,7 +1346,13 @@ def fake_runscript(_filename, namespace): dispatcher.dispatch("pop_runscript") self.assertEqual(session.CURRENTSET, [2]) - self.assertEqual(session.ALLCOM[-1], "LASTCOM = pop_runscript('/tmp/script.py');") + self.assertEqual(session.STUDY["name"], "script study") + self.assertEqual(session.CURRENTSTUDY, 1) + self.assertEqual( + session.ALLCOM, + ["EEG = script_command(EEG);", "LASTCOM = pop_runscript('/tmp/script.py');"], + ) + self.assertEqual(session.LASTCOM, "LASTCOM = pop_runscript('/tmp/script.py');") class QtMainWindowTests(unittest.TestCase): diff --git a/tests/test_pop_epoch.py b/tests/test_pop_epoch.py index 886bedfe..7e241076 100644 --- a/tests/test_pop_epoch.py +++ b/tests/test_pop_epoch.py @@ -6,6 +6,8 @@ MATLAB EEGLAB's pop_epoch function across all tested scenarios. """ +import contextlib +import io import os import numpy as np import unittest @@ -390,6 +392,27 @@ class TestPopEpochEdgeCases(unittest.TestCase): def setUp(self): np.random.seed(42) + def test_pop_epoch_does_not_print_to_stdout(self): + EEG = { + 'data': np.random.randn(2, 500).astype(np.float32), + 'srate': 100.0, + 'nbchan': 2, + 'pnts': 500, + 'trials': 1, + 'xmin': 0.0, + 'xmax': 4.99, + 'setname': 'stdout_test', + 'event': [{'type': 'stim', 'latency': 250}], + 'epoch': [], + 'saved': 'no', + } + stream = io.StringIO() + + with contextlib.redirect_stdout(stream): + pop_epoch(EEG, 'stim', [-0.1, 0.1]) + + self.assertEqual(stream.getvalue(), "") + def test_boundary_events_near_edges(self): """Test epoching when events are near data boundaries""" # Create EEG with events near boundaries From 2be2a7a88a5684bff05e1dfd92e53ff9e1dd1014 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Wed, 10 Jun 2026 17:24:27 -0700 Subject: [PATCH 08/16] Fold in useful Claude review fixes --- src/eegprep/functions/guifunc/menu_actions.py | 23 +++++++++++-------- tests/conftest.py | 2 -- tests/test_gui_main_window.py | 21 +++++++++++++++++ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py index b0c80e7a..dfcf9098 100644 --- a/src/eegprep/functions/guifunc/menu_actions.py +++ b/src/eegprep/functions/guifunc/menu_actions.py @@ -910,16 +910,19 @@ def _run_script(self, parent: Any | None) -> None: } command = pop_runscript(filename, namespace) self.session.echo_command(command) - self.session.apply_workspace_state( - eeg=namespace.get("EEG", self.session.EEG), - alleeg=namespace.get("ALLEEG", self.session.ALLEEG), - currentset=namespace.get("CURRENTSET", self.session.current_set_value()), - allcom=namespace.get("ALLCOM", self.session.ALLCOM), - lastcom=namespace.get("LASTCOM", self.session.LASTCOM), - study=namespace.get("STUDY", self.session.STUDY), - currentstudy=namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY), - command=command, - ) + state = { + "alleeg": namespace.get("ALLEEG", self.session.ALLEEG), + "currentset": namespace.get("CURRENTSET", self.session.current_set_value()), + "allcom": namespace.get("ALLCOM", self.session.ALLCOM), + "lastcom": namespace.get("LASTCOM", self.session.LASTCOM), + "study": namespace.get("STUDY", self.session.STUDY), + "currentstudy": namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY), + "command": command, + } + script_eeg = namespace.get("EEG", self.session.EEG) + if script_eeg is not self.session.EEG: + state["eeg"] = script_eeg + self.session.apply_workspace_state(**state) self._refresh() def _bids_tool_action(self, action: str, parent: Any | None) -> None: diff --git a/tests/conftest.py b/tests/conftest.py index 1735e1a8..b132c768 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,10 +60,8 @@ def _preload_matlab_libstdcxx() -> None: "tests/test_bids_preproc.py", "tests/test_clean_rawdata.py", "tests/test_eeg_compare.py", - "tests/test_eeg_eeg2mne.py", "tests/test_eeg_eegrej.py", "tests/test_eeg_lat2point.py", - "tests/test_eeg_mne2eeg_epochs.py", "tests/test_eeg_point2lat.py", "tests/test_eeg_rpsd_parity.py", "tests/test_eegfindboundaries.py", diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py index 26b43888..957d2cbb 100644 --- a/tests/test_gui_main_window.py +++ b/tests/test_gui_main_window.py @@ -1354,6 +1354,27 @@ def fake_runscript(_filename, namespace): ) self.assertEqual(session.LASTCOM, "LASTCOM = pop_runscript('/tmp/script.py');") + def test_file_menu_runscript_clear_currentset_resets_eeg(self): + session = EEGPrepSession() + session.store_current(_demo_eeg(), new=True) + dispatcher = MenuActionDispatcher(session) + qt_widgets = _fake_qt_widgets(open_file="/tmp/script.py") + + def fake_runscript(_filename, namespace): + namespace["CURRENTSET"] = 0 + return "LASTCOM = pop_runscript('/tmp/script.py');" + + with ( + mock.patch("eegprep.functions.guifunc.menu_actions._require_qt_widgets", return_value=qt_widgets), + mock.patch("eegprep.functions.popfunc.pop_runscript.pop_runscript", side_effect=fake_runscript), + ): + dispatcher.dispatch("pop_runscript") + + self.assertEqual(session.CURRENTSET, []) + self.assertIsInstance(session.EEG, dict) + self.assertEqual(session.EEG.get("setname"), "") + self.assertEqual(session.EEG["data"].size, 0) + class QtMainWindowTests(unittest.TestCase): def test_gui_main_window_startup_branding_size_and_menu_states(self): From 601b5adc3be2b5351348d9900c949dbe624c5d16 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Wed, 10 Jun 2026 18:22:03 -0700 Subject: [PATCH 09/16] Address code quality review findings --- .../functions/adminfunc/eeg_checkset.py | 13 - .../functions/adminfunc/eeglabcompat.py | 103 ++--- src/eegprep/functions/adminfunc/pymat.py | 130 ------ src/eegprep/functions/eegobj/eegobj.py | 5 - src/eegprep/functions/guifunc/menu_actions.py | 36 +- .../functions/guifunc/visual_capture.py | 384 ++++++++---------- .../functions/miscfunc/save_struct_as_hdf5.py | 18 - src/eegprep/functions/popfunc/eeg_compare.py | 13 - src/eegprep/functions/popfunc/eeg_eegrej.py | 6 +- src/eegprep/functions/popfunc/eeg_interp.py | 159 -------- .../functions/popfunc/eeg_lat2point.py | 7 +- .../functions/popfunc/pop_load_frombids.py | 2 +- src/eegprep/functions/popfunc/pop_loadset.py | 13 - .../functions/popfunc/pop_loadset_h5.py | 13 - src/eegprep/functions/popfunc/pop_saveset.py | 17 - src/eegprep/functions/popfunc/pop_select.py | 28 +- src/eegprep/plugins/ICLabel/eeg_autocorr.py | 24 -- .../plugins/ICLabel/eeg_autocorr_fftw.py | 27 -- .../plugins/ICLabel/eeg_autocorr_welch.py | 16 - src/eegprep/plugins/ICLabel/eeg_rpsd.py | 18 - .../ICLabel/iclabel_net_load_py_measures.py | 23 +- tests/test_eeg_eegrej.py | 9 +- tests/test_eeglabcompat.py | 41 ++ tests/test_gui_main_window.py | 20 + tests/test_processing_logging_contract.py | 70 ++++ tests/test_visual_parity.py | 17 +- 26 files changed, 400 insertions(+), 812 deletions(-) create mode 100644 tests/test_processing_logging_contract.py diff --git a/src/eegprep/functions/adminfunc/eeg_checkset.py b/src/eegprep/functions/adminfunc/eeg_checkset.py index b94b5c58..3e04c78f 100644 --- a/src/eegprep/functions/adminfunc/eeg_checkset.py +++ b/src/eegprep/functions/adminfunc/eeg_checkset.py @@ -545,16 +545,3 @@ def eeg_checkset(EEG, *checks, load_data=True): ) return EEG - - -def test_eeg_checkset(): - from eegprep.functions.popfunc.pop_loadset import pop_loadset - - eeglab_file_path = './sample_data/eeglab_data_with_ica_tmp_out2.set' - EEG = pop_loadset(eeglab_file_path) - EEG = eeg_checkset(EEG) - logger.info('Checkset done') - - -if __name__ == '__main__': - test_eeg_checkset() diff --git a/src/eegprep/functions/adminfunc/eeglabcompat.py b/src/eegprep/functions/adminfunc/eeglabcompat.py index f7b9c532..063ae948 100644 --- a/src/eegprep/functions/adminfunc/eeglabcompat.py +++ b/src/eegprep/functions/adminfunc/eeglabcompat.py @@ -169,9 +169,9 @@ def wrapper(*args, **kwargs): temp_filename2 = temp_file2.name result_filename = temp_filename1 + '.result.set' result_extra_filename = temp_filename1 + '.result.mat' - print(f"temp_filename1: {temp_filename1}") - print(f"temp_filename2: {temp_filename2}") - print(f"result_filename: {result_filename}") + logger.debug("MATLAB roundtrip input set path: %s", temp_filename1) + logger.debug("MATLAB roundtrip args path: %s", temp_filename2) + logger.debug("MATLAB roundtrip result set path: %s", result_filename) # save all parameters in the temp_filename which is a .mat file if len(new_args) > 0: @@ -192,7 +192,7 @@ def wrapper(*args, **kwargs): pop_saveset(args[0], temp_filename1) self.engine.eval(f"EEG = pop_loadset('{temp_filename1}');", nargout=0) - print(f"Running in MATLAB/Octave: {eval_str}") + logger.debug("Running in MATLAB/Octave: %s", eval_str) self.engine.eval(eval_str, nargout=0) # output @@ -280,12 +280,12 @@ def get_eeglab(runtime: str = default_runtime, *, auto_file_roundtrip: bool = Tr try: engine = _cache[rt] except KeyError: - print(f"Loading {runtime} runtime...", end='', flush=True) + logger.info("Loading %s runtime...", runtime) # On the command line, type "octave-8.4.0" OCTAVE_EXECUTABLE or OCTAVE var path2eeglab = str(_resolve_eeglab_root()) matlab_test_dir = REPO_ROOT / 'tests' / 'matlab' scripts_dir = str(REPO_ROOT / 'scripts') - print("This is the path2eeglab: ", path2eeglab) + logger.debug("EEGLAB reference path: %s", path2eeglab) # not yet loaded, do so now if rt == 'oct': @@ -344,7 +344,7 @@ def get_eeglab(runtime: str = default_runtime, *, auto_file_roundtrip: bool = Tr engine.logger.setLevel(logging.INFO) _cache[rt] = engine - print('done.') + logger.info("Loaded %s runtime.", runtime) # optionally wrap the engine in a file-roundtripping wrapper if auto_file_roundtrip: @@ -496,66 +496,29 @@ def clean_artifacts( else: BurstRejection = 'on' - pop_saveset(EEG, './tmp.set') # 0.8 seconds - EEG2 = eeglab.pop_loadset('./tmp.set') # 2 seconds - EEG3 = eeglab.clean_artifacts( - EEG2, - 'ChannelCriterion', - ChannelCriterion, - 'LineNoiseCriterion', - LineNoiseCriterion, - 'FlatlineCriterion', - FlatlineCriterion, - 'BurstCriterion', - BurstCriterion, - 'BurstRejection', - BurstRejection, - 'WindowCriterion', - WindowCriterion, - 'Highpass', - Highpass, - 'WindowCriterionTolerances', - WindowCriterionTolerances, - ) - eeglab.pop_saveset(EEG3, './tmp2.set') # 2.4 seconds - EEG4 = pop_loadset('./tmp2.set') # 0.2 seconds - - # delete temporary files - os.remove('./tmp.set') - os.remove('./tmp2.set') - return EEG4 - - -# sys.exit() -def test_eeglab_compat(): - """Test EEGLAB compatibility.""" - eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set' - - EEG = pop_loadset(eeglab_file_path) - EEG = pop_eegfiltnew(EEG, locutoff=5, hicutoff=25, revfilt=True, plotfreqz=False) - EEG = clean_artifacts( - EEG, - FlatlineCriterion=5, - ChannelCriterion=0.87, - LineNoiseCriterion=4, - Highpass=False, - BurstCriterion=20, - WindowCriterion=0.25, - BurstRejection=False, - WindowCriterionTolerances=[float('-inf'), 7], - ) - - # EEG = eeglab.pop_loadset(eeglab_file_path) - # TMPEEG = eeglab.pop_eegfiltnew(EEG, 'locutoff',5,'hicutoff',25,'revfilt',1,'plotfreqz',0) - # CLEANEDEEG = eeglab.clean_artifacts(TMPEEG, 'ChannelCriterion', 'off', - # 'LineNoiseCriterion', 'off', - # 'FlatlineCriterion', 'off', - # 'BurstCriterion', 'off', - # 'WindowCriterion', 0, - # 'Highpass',[0.25, 0.75], - # 'WindowCriterionTolerances', [-10000000, 8]) - - # clean_artifacts( EEG, ChannelCriterion='on' ) - - -# test_eeglab_compat() + with tempfile.TemporaryDirectory(prefix="eegprep_clean_artifacts_") as workdir: + input_path = Path(workdir) / "input.set" + output_path = Path(workdir) / "output.set" + pop_saveset(EEG, input_path) + EEG2 = eeglab.pop_loadset(str(input_path)) + EEG3 = eeglab.clean_artifacts( + EEG2, + 'ChannelCriterion', + ChannelCriterion, + 'LineNoiseCriterion', + LineNoiseCriterion, + 'FlatlineCriterion', + FlatlineCriterion, + 'BurstCriterion', + BurstCriterion, + 'BurstRejection', + BurstRejection, + 'WindowCriterion', + WindowCriterion, + 'Highpass', + Highpass, + 'WindowCriterionTolerances', + WindowCriterionTolerances, + ) + eeglab.pop_saveset(EEG3, str(output_path)) + return pop_loadset(output_path) diff --git a/src/eegprep/functions/adminfunc/pymat.py b/src/eegprep/functions/adminfunc/pymat.py index d1bf6152..48f334ae 100644 --- a/src/eegprep/functions/adminfunc/pymat.py +++ b/src/eegprep/functions/adminfunc/pymat.py @@ -266,133 +266,3 @@ def mat2py(obj): else: # Fallback: return the object as-is if no conversion rule applies return obj - - -def test_py2mat(): - """Test the py2mat and mat2py conversion functions with various data structures.""" - import scipy.io - - # Test basic functionality - print("=== Basic Test ===") - dicts = [{'a': 'adsaf1', 'b': 2.0}, {'a': 'adsaf', 'b': 4.0}, {'a': 'adsaf33', 'b': 7.0}] - struct_array = py2mat(dicts) - print("Original: ", dicts) - - mat2py(struct_array) - scipy.io.savemat('test1.mat', {'struct_array': struct_array}) - struct_array2 = scipy.io.loadmat('test1.mat') - struct_array2 = struct_array2['struct_array'][0] - dicts3 = mat2py(struct_array2) - print("Converted: ", dicts3) - - # Test nested dictionaries - print("\n=== Nested Dictionary Test ===") - nested_dicts = [ - {'name': 'item1', 'value': 10.5, 'config': {'enabled': True, 'threshold': 0.8}, 'tags': ['tag1', 'tag2']}, - {'name': 'item2', 'value': 20.3, 'config': {'enabled': False, 'threshold': 0.9}, 'tags': ['tag3']}, - ] - nested_struct = py2mat(nested_dicts) - print("Original: ", nested_dicts) - - nested_dict2 = mat2py(nested_struct) - print("Converted back (not fully compatible): ", nested_dict2) - - scipy.io.savemat('test2.mat', {'nested_struct': nested_struct}) - nested_struct2 = scipy.io.loadmat('test2.mat') - nested_struct2 = nested_struct2['nested_struct'][0] - nested_dict3 = mat2py(nested_struct2) - print("Converted: ", nested_dict3) - - # Test list of dictionaries as values - print("\n=== List of Dictionaries Test ===") - list_dict_data = [ - {'id': 1, 'measurements': [{'sensor': 'A', 'reading': 1.2}, {'sensor': 'B', 'reading': 2.3}]}, - { - 'id': 2, - 'measurements': [ - {'sensor': 'A', 'reading': 3.4}, - {'sensor': 'B', 'reading': 4.5}, - {'sensor': 'C', 'reading': 5.6}, - ], - }, - ] - list_dict_struct = py2mat(list_dict_data) - scipy.io.savemat('test3.mat', {'list_dict_struct': list_dict_struct}) - list_dict_struct2 = scipy.io.loadmat('test3.mat') - list_dict_struct2 = list_dict_struct2['list_dict_struct'][0] - list_dict_data3 = mat2py(list_dict_struct2) - print("Original: ", list_dict_data) - print("Converted: ", list_dict_data3) - - # Test single dictionary input - print("\n=== Single Dictionary Test ===") - single_dict = {'x': 1, 'y': 2, 'nested': {'a': 'hello', 'b': 'world'}} - single_struct = py2mat(single_dict) - scipy.io.savemat('test4.mat', {'single_struct': single_struct}) - single_struct2 = scipy.io.loadmat('test4.mat') - single_struct2 = single_struct2['single_struct'][0] - single_dict2 = mat2py(single_struct2) - print("Original: ", single_dict) - print("Converted: ", single_dict2) - - # Test numpy array of dictionaries - print("\n=== NumPy Array of Dictionaries Test ===") - dict_array = np.array( - [{'name': 'sensor1', 'value': 1.1}, {'name': 'sensor2', 'value': 2.2}, {'name': 'sensor3', 'value': 3.3}], - dtype=object, - ) - - array_dict_data = [ - {'id': 'device1', 'sensors': dict_array}, - { - 'id': 'device2', - 'sensors': np.array([{'name': 'sensorA', 'value': 4.4}, {'name': 'sensorB', 'value': 5.5}], dtype=object), - }, - ] - - array_dict_struct = py2mat(array_dict_data) - scipy.io.savemat('test5.mat', {'array_dict_struct': array_dict_struct}) - array_dict_struct2 = scipy.io.loadmat('test5.mat') - array_dict_struct2 = array_dict_struct2['array_dict_struct'][0] - array_dict_data2 = mat2py(array_dict_struct2) - print("Original: ", array_dict_data) - print("Converted: ", array_dict_data2) # Numpy array gets converted to a list of dicts - - params = [np.vstack([np.arange(1, 21), np.arange(101, 121)]), [[5, 8]], 10.0, [{'latency': 5.0}, {'latency': 10.0}]] - params_struct = py2mat(params) - scipy.io.savemat('test6.mat', {'params_struct': params_struct}) - params_struct2 = scipy.io.loadmat('test6.mat') - params_struct2 = params_struct2['params_struct'][0] - params_data2 = mat2py(params_struct2) - print("Original: ", params) - print("Converted: ", params_data2) - - # EEGLAB dataset - eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set' - from eegprep.functions.popfunc.pop_loadset import pop_loadset - - pop_loadset(eeglab_file_path) - - # pop_loadset wihtout index adjustment - EEG_LOADMAT = scipy.io.loadmat(eeglab_file_path) - EEG_LOADMAT = mat2py(EEG_LOADMAT['EEG'][0]) - - # pop_saveset without index adjustment - EEG_TMP = EEG_LOADMAT.copy() - EEG_TMP = py2mat(EEG_TMP) - scipy.io.savemat('test7.set', {'EEG': EEG_TMP}) - - # load again - EEG_LOADMAT2 = scipy.io.loadmat('test7.set') - EEG_LOADMAT2 = mat2py(EEG_LOADMAT2['EEG'][0]) - - # Limitations - print("\n=== Limitations ===") - print( - "- Conversion back: py2mat then mat2py does not always work for nested structures (works when the file is saved as a .mat file)" - ) - print("- Numpy arrays of dicts are converted to lists of dicts (an intented feature)") - - -if __name__ == "__main__": - test_py2mat() diff --git a/src/eegprep/functions/eegobj/eegobj.py b/src/eegprep/functions/eegobj/eegobj.py index ad060af8..ca13515f 100644 --- a/src/eegprep/functions/eegobj/eegobj.py +++ b/src/eegprep/functions/eegobj/eegobj.py @@ -255,8 +255,3 @@ def _safe(val, default=''): return '\n'.join(lines) __str__ = __repr__ - - -if __name__ == '__main__': - obj = EEGobj('sample_data/eeglab_data.set') - print(obj) diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py index dfcf9098..4e74e90c 100644 --- a/src/eegprep/functions/guifunc/menu_actions.py +++ b/src/eegprep/functions/guifunc/menu_actions.py @@ -1423,34 +1423,36 @@ def _run_dipfit_function(self, name: str, parent: Any | None) -> None: if name == "pop_dipfit_headmodel": from eegprep.plugins.dipfit.pop_dipfit_headmodel import pop_dipfit_headmodel - pop_dipfit_headmodel(selection, return_com=True) - return - if name == "pop_dipfit_gridsearch": + out = pop_dipfit_headmodel(selection, return_com=True) + elif name == "pop_dipfit_gridsearch": from eegprep.plugins.dipfit.pop_dipfit_gridsearch import pop_dipfit_gridsearch - pop_dipfit_gridsearch(selection, return_com=True) - return - if name == "pop_dipfit_nonlinear": + out = pop_dipfit_gridsearch(selection, return_com=True) + elif name == "pop_dipfit_nonlinear": from eegprep.plugins.dipfit.pop_dipfit_nonlinear import pop_dipfit_nonlinear - pop_dipfit_nonlinear(selection, return_com=True) - return - if name == "pop_multifit": + out = pop_dipfit_nonlinear(selection, return_com=True) + elif name == "pop_multifit": from eegprep.plugins.dipfit.pop_multifit import pop_multifit - pop_multifit(selection, return_com=True) - return - if name == "pop_leadfield": + out = pop_multifit(selection, return_com=True) + elif name == "pop_leadfield": from eegprep.plugins.dipfit.pop_leadfield import pop_leadfield - pop_leadfield(selection, return_com=True) - return - if name == "pop_dipfit_loreta": + out = pop_leadfield(selection, return_com=True) + elif name == "pop_dipfit_loreta": from eegprep.plugins.dipfit.pop_dipfit_loreta import pop_dipfit_loreta - pop_dipfit_loreta(selection, return_com=True) + out = pop_dipfit_loreta(selection, return_com=True) + else: + self.show_coming_soon(name, parent) + return + if not isinstance(out, tuple): return - self.show_coming_soon(name, parent) + eeg_out, command = out[0], out[1] if len(out) > 1 else "" + if command: + self._store_current_from_gui(eeg_out, command=command) + self._refresh() def _plot_channel_locations(self, variant: str, parent: Any | None) -> None: selection = self._current_selection_or_warn(parent) diff --git a/src/eegprep/functions/guifunc/visual_capture.py b/src/eegprep/functions/guifunc/visual_capture.py index fd51ef21..bb6f89c1 100644 --- a/src/eegprep/functions/guifunc/visual_capture.py +++ b/src/eegprep/functions/guifunc/visual_capture.py @@ -7,7 +7,7 @@ import os import pathlib import sys -from typing import Any +from typing import Any, Callable import matplotlib.pyplot as plt import numpy as np @@ -1339,237 +1339,171 @@ def capture_pophelp_dialog(output: pathlib.Path, function_name: str) -> None: _grab_dialog(dialog, output, app) +CaptureHandler = Callable[[pathlib.Path], None] + + +def _capture_with(function: Callable[..., None], **kwargs: Any) -> CaptureHandler: + def handler(output: pathlib.Path) -> None: + function(output, **kwargs) + + return handler + + +def _capture_case_handlers() -> dict[str, CaptureHandler]: + handlers: dict[str, CaptureHandler] = { + "adjust_events_dialog": capture_adjust_events_dialog, + "main_window": capture_main_window, + "main_window_continuous": _capture_with(capture_main_window, state="continuous"), + "main_window_epoched": _capture_with(capture_main_window, state="epoched"), + "main_window_multiple": _capture_with(capture_main_window, state="multiple"), + "main_window_study": _capture_with(capture_main_window, state="study"), + "file_menu": _capture_with(capture_main_window, menu_label="File"), + "edit_menu": _capture_with(capture_main_window, menu_label="Edit"), + "tools_menu": _capture_with(capture_main_window, menu_label="Tools"), + "plot_menu": _capture_with(capture_main_window, menu_label="Plot"), + "study_menu": _capture_with(capture_main_window, menu_label="Study"), + "datasets_menu": _capture_with(capture_main_window, menu_label="Datasets"), + "help_menu": _capture_with(capture_main_window, menu_label="Help"), + "pop_comments_dialog": capture_pop_comments_dialog, + "pop_editset_dialog": capture_pop_editset_dialog, + "pop_editeventfield_dialog": capture_pop_editeventfield_dialog, + "pop_editeventvals_dialog": capture_pop_editeventvals_dialog, + "pop_selectevent_dialog": capture_pop_selectevent_dialog, + "pop_rmdat_dialog": capture_pop_rmdat_dialog, + "pop_chanedit_dialog": capture_pop_chanedit_dialog, + "pop_copyset_dialog": capture_pop_copyset_dialog, + "pop_mergeset_dialog": capture_pop_mergeset_dialog, + "pop_study_dialog": capture_pop_study_dialog, + "pop_studydesign_dialog": capture_pop_studydesign_dialog, + "pop_precomp_dialog": capture_pop_precomp_dialog, + "pop_preclust_dialog": capture_pop_preclust_dialog, + "pop_clust_dialog": capture_pop_clust_dialog, + "pop_chanplot_dialog": capture_pop_chanplot_dialog, + "pop_clustedit_dialog": capture_pop_clustedit_dialog, + "reref_dialog": capture_reref_dialog, + "reref_dialog_channel_ref": _capture_with(capture_reref_dialog, variant="channels"), + "reref_dialog_huber_ref": _capture_with(capture_reref_dialog, variant="huber"), + "reref_dialog_interp_removed": _capture_with(capture_reref_dialog, variant="interp_removed"), + "pop_interp_dialog": capture_pop_interp_dialog, + "pop_interp_removed_dialog": _capture_with(capture_pop_interp_dialog, variant="removed"), + "pop_interp_epoched_dialog": _capture_with(capture_pop_interp_dialog, variant="epoched"), + "pop_select_dialog": capture_pop_select_dialog, + "pop_resample_dialog": capture_pop_resample_dialog, + "pop_newset_dialog": capture_pop_newset_dialog, + "pop_rmbase_dialog": capture_pop_rmbase_dialog, + "pop_eegfilt_dialog": capture_pop_eegfilt_dialog, + "pop_eegfiltnew_dialog": capture_pop_eegfiltnew_dialog, + "pop_firws_dialog": capture_pop_firws_dialog, + "pop_firpm_dialog": capture_pop_firpm_dialog, + "pop_firma_dialog": capture_pop_firma_dialog, + "pop_kaiserbeta_dialog": capture_pop_kaiserbeta_dialog, + "pop_firwsord_dialog": capture_pop_firwsord_dialog, + "pop_firpmord_dialog": capture_pop_firpmord_dialog, + "pop_xfirws_dialog": capture_pop_xfirws_dialog, + "pop_epoch_dialog": capture_pop_epoch_dialog, + "pop_topoplot_erp_dialog": _capture_with(capture_pop_topoplot_dialog, variant="erp"), + "pop_topoplot_components_dialog": _capture_with(capture_pop_topoplot_dialog, variant="components"), + "pop_spectopo_channels_dialog": _capture_with(capture_pop_spectopo_dialog, variant="channels"), + "pop_spectopo_components_dialog": _capture_with(capture_pop_spectopo_dialog, variant="components"), + "pop_prop_channels_dialog": _capture_with(capture_pop_prop_dialog, variant="channels"), + "pop_prop_components_dialog": _capture_with(capture_pop_prop_dialog, variant="components"), + "pop_timtopo_dialog": capture_pop_timtopo_dialog, + "pop_plottopo_dialog": capture_pop_plottopo_dialog, + "pop_headplot_erp_dialog": _capture_with(capture_pop_headplot_dialog, variant="erp"), + "pop_headplot_components_dialog": _capture_with(capture_pop_headplot_dialog, variant="components"), + "coregister_dialog": capture_coregister_dialog, + "pop_plotdata_dialog": capture_pop_plotdata_dialog, + "pop_erpimage_channels_dialog": _capture_with(capture_pop_erpimage_dialog, variant="channels"), + "pop_erpimage_components_dialog": _capture_with(capture_pop_erpimage_dialog, variant="components"), + "pop_envtopo_dialog": capture_pop_envtopo_dialog, + "pop_comperp_channels_dialog": _capture_with(capture_pop_comperp_dialog, variant="channels"), + "pop_comperp_components_dialog": _capture_with(capture_pop_comperp_dialog, variant="components"), + "pop_newtimef_channels_dialog": _capture_with(capture_pop_newtimef_dialog, variant="channels"), + "pop_newtimef_components_dialog": _capture_with(capture_pop_newtimef_dialog, variant="components"), + "pop_newcrossf_channels_dialog": _capture_with(capture_pop_newcrossf_dialog, variant="channels"), + "pop_newcrossf_components_dialog": _capture_with(capture_pop_newcrossf_dialog, variant="components"), + "pop_signalstat_channels_dialog": _capture_with(capture_pop_signalstat_dialog, variant="channels"), + "pop_signalstat_components_dialog": _capture_with(capture_pop_signalstat_dialog, variant="components"), + "pop_eventstat_dialog": capture_pop_eventstat_dialog, + "pop_runica_dialog": capture_pop_runica_dialog, + "pop_runica_multiple_dialog": capture_pop_runica_multiple_dialog, + "pop_iclabel_dialog": capture_pop_iclabel_dialog, + "pop_icflag_dialog": capture_pop_icflag_dialog, + "iclabel_pop_prop_extended_dashboard": capture_pop_prop_extended_dashboard, + "pop_subcomp_dialog": capture_pop_subcomp_dialog, + "pop_clean_rawdata_dialog": capture_pop_clean_rawdata_dialog, + "pop_chansel_dialog": capture_pop_chansel_dialog, + "select_multiple_datasets_dialog": capture_select_multiple_datasets_dialog, + "pop_interp_dataset_index_dialog": capture_dataset_index_dialog, + "pop_reref_help_dialog": _capture_with(capture_pophelp_dialog, function_name="pop_reref"), + "pop_interp_help_dialog": _capture_with(capture_pophelp_dialog, function_name="pop_interp"), + } + handlers.update( + { + f"eegbrowser_{variant}": _capture_with(capture_eegbrowser, variant=variant) + for variant in ( + "continuous", + "continuous_marked", + "epoched", + "epoched_marked", + "events", + "grid_off", + "labels", + "component_activity", + "data2_overlay", + "spectral_overlay", + "pop_eegplot_reject_data", + "rejcont_continuous", + "rejection_epochs", + ) + } + ) + handlers.update( + { + case_id: _capture_with(capture_rejection_dialog, case_id=case_id) + for case_id in ( + "pop_autorej_dialog", + "pop_eegthresh_dialog", + "pop_jointprob_dialog", + "pop_rejchan_dialog", + "pop_rejcont_dialog", + "pop_rejkurt_dialog", + "pop_rejmenu_dialog", + "pop_rejspec_dialog", + "pop_rejtrend_dialog", + "pop_selectcomps_dialog", + "pop_viewprops_dialog", + ) + } + ) + handlers.update( + { + case_id: _capture_with(capture_dipfit_dialog, case_id=case_id) + for case_id in ( + "pop_dipfit_settings_dialog", + "pop_dipfit_headmodel_dialog", + "pop_dipfit_gridsearch_dialog", + "pop_dipfit_nonlinear_dialog", + "pop_dipplot_dialog", + "pop_multifit_dialog", + "pop_leadfield_dialog", + "pop_dipfit_loreta_dialog", + ) + } + ) + return handlers + + def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--case", required=True) parser.add_argument("--output", required=True, type=pathlib.Path) args = parser.parse_args(argv) - if args.case == "adjust_events_dialog": - capture_adjust_events_dialog(args.output) - elif args.case == "main_window": - capture_main_window(args.output) - elif args.case == "main_window_continuous": - capture_main_window(args.output, state="continuous") - elif args.case == "main_window_epoched": - capture_main_window(args.output, state="epoched") - elif args.case == "main_window_multiple": - capture_main_window(args.output, state="multiple") - elif args.case == "main_window_study": - capture_main_window(args.output, state="study") - elif args.case == "eegbrowser_continuous": - capture_eegbrowser(args.output, variant="continuous") - elif args.case == "eegbrowser_continuous_marked": - capture_eegbrowser(args.output, variant="continuous_marked") - elif args.case == "eegbrowser_epoched": - capture_eegbrowser(args.output, variant="epoched") - elif args.case == "eegbrowser_epoched_marked": - capture_eegbrowser(args.output, variant="epoched_marked") - elif args.case == "eegbrowser_events": - capture_eegbrowser(args.output, variant="events") - elif args.case == "eegbrowser_grid_off": - capture_eegbrowser(args.output, variant="grid_off") - elif args.case == "eegbrowser_labels": - capture_eegbrowser(args.output, variant="labels") - elif args.case == "eegbrowser_component_activity": - capture_eegbrowser(args.output, variant="component_activity") - elif args.case == "eegbrowser_data2_overlay": - capture_eegbrowser(args.output, variant="data2_overlay") - elif args.case == "eegbrowser_spectral_overlay": - capture_eegbrowser(args.output, variant="spectral_overlay") - elif args.case == "eegbrowser_pop_eegplot_reject_data": - capture_eegbrowser(args.output, variant="pop_eegplot_reject_data") - elif args.case == "eegbrowser_rejcont_continuous": - capture_eegbrowser(args.output, variant="rejcont_continuous") - elif args.case == "eegbrowser_rejection_epochs": - capture_eegbrowser(args.output, variant="rejection_epochs") - elif args.case == "file_menu": - capture_main_window(args.output, menu_label="File") - elif args.case == "edit_menu": - capture_main_window(args.output, menu_label="Edit") - elif args.case == "tools_menu": - capture_main_window(args.output, menu_label="Tools") - elif args.case == "plot_menu": - capture_main_window(args.output, menu_label="Plot") - elif args.case == "study_menu": - capture_main_window(args.output, menu_label="Study") - elif args.case == "datasets_menu": - capture_main_window(args.output, menu_label="Datasets") - elif args.case == "help_menu": - capture_main_window(args.output, menu_label="Help") - elif args.case == "pop_comments_dialog": - capture_pop_comments_dialog(args.output) - elif args.case == "pop_editset_dialog": - capture_pop_editset_dialog(args.output) - elif args.case == "pop_editeventfield_dialog": - capture_pop_editeventfield_dialog(args.output) - elif args.case == "pop_editeventvals_dialog": - capture_pop_editeventvals_dialog(args.output) - elif args.case == "pop_selectevent_dialog": - capture_pop_selectevent_dialog(args.output) - elif args.case == "pop_rmdat_dialog": - capture_pop_rmdat_dialog(args.output) - elif args.case == "pop_chanedit_dialog": - capture_pop_chanedit_dialog(args.output) - elif args.case == "pop_copyset_dialog": - capture_pop_copyset_dialog(args.output) - elif args.case == "pop_mergeset_dialog": - capture_pop_mergeset_dialog(args.output) - elif args.case == "pop_study_dialog": - capture_pop_study_dialog(args.output) - elif args.case == "pop_studydesign_dialog": - capture_pop_studydesign_dialog(args.output) - elif args.case == "pop_precomp_dialog": - capture_pop_precomp_dialog(args.output) - elif args.case == "pop_preclust_dialog": - capture_pop_preclust_dialog(args.output) - elif args.case == "pop_clust_dialog": - capture_pop_clust_dialog(args.output) - elif args.case == "pop_chanplot_dialog": - capture_pop_chanplot_dialog(args.output) - elif args.case == "pop_clustedit_dialog": - capture_pop_clustedit_dialog(args.output) - elif args.case == "reref_dialog": - capture_reref_dialog(args.output) - elif args.case == "reref_dialog_channel_ref": - capture_reref_dialog(args.output, variant="channels") - elif args.case == "reref_dialog_huber_ref": - capture_reref_dialog(args.output, variant="huber") - elif args.case == "reref_dialog_interp_removed": - capture_reref_dialog(args.output, variant="interp_removed") - elif args.case == "pop_interp_dialog": - capture_pop_interp_dialog(args.output) - elif args.case == "pop_interp_removed_dialog": - capture_pop_interp_dialog(args.output, variant="removed") - elif args.case == "pop_interp_epoched_dialog": - capture_pop_interp_dialog(args.output, variant="epoched") - elif args.case == "pop_select_dialog": - capture_pop_select_dialog(args.output) - elif args.case == "pop_resample_dialog": - capture_pop_resample_dialog(args.output) - elif args.case == "pop_newset_dialog": - capture_pop_newset_dialog(args.output) - elif args.case == "pop_rmbase_dialog": - capture_pop_rmbase_dialog(args.output) - elif args.case == "pop_eegfilt_dialog": - capture_pop_eegfilt_dialog(args.output) - elif args.case == "pop_eegfiltnew_dialog": - capture_pop_eegfiltnew_dialog(args.output) - elif args.case == "pop_firws_dialog": - capture_pop_firws_dialog(args.output) - elif args.case == "pop_firpm_dialog": - capture_pop_firpm_dialog(args.output) - elif args.case == "pop_firma_dialog": - capture_pop_firma_dialog(args.output) - elif args.case == "pop_kaiserbeta_dialog": - capture_pop_kaiserbeta_dialog(args.output) - elif args.case == "pop_firwsord_dialog": - capture_pop_firwsord_dialog(args.output) - elif args.case == "pop_firpmord_dialog": - capture_pop_firpmord_dialog(args.output) - elif args.case == "pop_xfirws_dialog": - capture_pop_xfirws_dialog(args.output) - elif args.case == "pop_epoch_dialog": - capture_pop_epoch_dialog(args.output) - elif args.case == "pop_topoplot_erp_dialog": - capture_pop_topoplot_dialog(args.output, variant="erp") - elif args.case == "pop_topoplot_components_dialog": - capture_pop_topoplot_dialog(args.output, variant="components") - elif args.case == "pop_spectopo_channels_dialog": - capture_pop_spectopo_dialog(args.output, variant="channels") - elif args.case == "pop_spectopo_components_dialog": - capture_pop_spectopo_dialog(args.output, variant="components") - elif args.case == "pop_prop_channels_dialog": - capture_pop_prop_dialog(args.output, variant="channels") - elif args.case == "pop_prop_components_dialog": - capture_pop_prop_dialog(args.output, variant="components") - elif args.case == "pop_timtopo_dialog": - capture_pop_timtopo_dialog(args.output) - elif args.case == "pop_plottopo_dialog": - capture_pop_plottopo_dialog(args.output) - elif args.case == "pop_headplot_erp_dialog": - capture_pop_headplot_dialog(args.output, variant="erp") - elif args.case == "pop_headplot_components_dialog": - capture_pop_headplot_dialog(args.output, variant="components") - elif args.case == "coregister_dialog": - capture_coregister_dialog(args.output) - elif args.case == "pop_plotdata_dialog": - capture_pop_plotdata_dialog(args.output) - elif args.case == "pop_erpimage_channels_dialog": - capture_pop_erpimage_dialog(args.output, variant="channels") - elif args.case == "pop_erpimage_components_dialog": - capture_pop_erpimage_dialog(args.output, variant="components") - elif args.case == "pop_envtopo_dialog": - capture_pop_envtopo_dialog(args.output) - elif args.case == "pop_comperp_channels_dialog": - capture_pop_comperp_dialog(args.output, variant="channels") - elif args.case == "pop_comperp_components_dialog": - capture_pop_comperp_dialog(args.output, variant="components") - elif args.case == "pop_newtimef_channels_dialog": - capture_pop_newtimef_dialog(args.output, variant="channels") - elif args.case == "pop_newtimef_components_dialog": - capture_pop_newtimef_dialog(args.output, variant="components") - elif args.case == "pop_newcrossf_channels_dialog": - capture_pop_newcrossf_dialog(args.output, variant="channels") - elif args.case == "pop_newcrossf_components_dialog": - capture_pop_newcrossf_dialog(args.output, variant="components") - elif args.case == "pop_signalstat_channels_dialog": - capture_pop_signalstat_dialog(args.output, variant="channels") - elif args.case == "pop_signalstat_components_dialog": - capture_pop_signalstat_dialog(args.output, variant="components") - elif args.case == "pop_eventstat_dialog": - capture_pop_eventstat_dialog(args.output) - elif args.case == "pop_runica_dialog": - capture_pop_runica_dialog(args.output) - elif args.case == "pop_runica_multiple_dialog": - capture_pop_runica_multiple_dialog(args.output) - elif args.case == "pop_iclabel_dialog": - capture_pop_iclabel_dialog(args.output) - elif args.case == "pop_icflag_dialog": - capture_pop_icflag_dialog(args.output) - elif args.case == "iclabel_pop_prop_extended_dashboard": - capture_pop_prop_extended_dashboard(args.output) - elif args.case == "pop_subcomp_dialog": - capture_pop_subcomp_dialog(args.output) - elif args.case in { - "pop_autorej_dialog", - "pop_eegthresh_dialog", - "pop_jointprob_dialog", - "pop_rejchan_dialog", - "pop_rejcont_dialog", - "pop_rejkurt_dialog", - "pop_rejmenu_dialog", - "pop_rejspec_dialog", - "pop_rejtrend_dialog", - "pop_selectcomps_dialog", - "pop_viewprops_dialog", - }: - capture_rejection_dialog(args.output, case_id=args.case) - elif args.case in { - "pop_dipfit_settings_dialog", - "pop_dipfit_headmodel_dialog", - "pop_dipfit_gridsearch_dialog", - "pop_dipfit_nonlinear_dialog", - "pop_dipplot_dialog", - "pop_multifit_dialog", - "pop_leadfield_dialog", - "pop_dipfit_loreta_dialog", - }: - capture_dipfit_dialog(args.output, case_id=args.case) - elif args.case == "pop_clean_rawdata_dialog": - capture_pop_clean_rawdata_dialog(args.output) - elif args.case == "pop_chansel_dialog": - capture_pop_chansel_dialog(args.output) - elif args.case == "select_multiple_datasets_dialog": - capture_select_multiple_datasets_dialog(args.output) - elif args.case == "pop_interp_dataset_index_dialog": - capture_dataset_index_dialog(args.output) - elif args.case == "pop_reref_help_dialog": - capture_pophelp_dialog(args.output, "pop_reref") - elif args.case == "pop_interp_help_dialog": - capture_pophelp_dialog(args.output, "pop_interp") - else: + handler = _capture_case_handlers().get(args.case) + if handler is None: parser.error(f"unsupported EEGPrep visual capture case: {args.case}") + handler(args.output) return 0 diff --git a/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py b/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py index 7567697c..749b703d 100644 --- a/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py +++ b/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py @@ -52,21 +52,3 @@ def save_dict_to_hdf5(data, filename, dataset_name): # Save to HDF5 with h5py.File(filename, 'w') as hdf: hdf.create_dataset(dataset_name, data=structured_data) - - -if __name__ == '__main__': - data = { - 'labels': 'FPz', - 'theta': np.array([0, 1, 2, 3]), - 'radius': 0.5066888888888889, - 'X': 84.98123361344625, - 'Y': 0, - 'Z': -1.7860385037488253, - 'sph_theta': 0, - 'sph_phi': -1.203999999999994, - 'sph_radius': 85, - 'type': 'EEG', - 'urchan': 1, - 'ref': None, - } - save_dict_to_hdf5(data, 'data.h5', 'dataset_name') diff --git a/src/eegprep/functions/popfunc/eeg_compare.py b/src/eegprep/functions/popfunc/eeg_compare.py index 8cd52c48..468e3a6a 100644 --- a/src/eegprep/functions/popfunc/eeg_compare.py +++ b/src/eegprep/functions/popfunc/eeg_compare.py @@ -388,16 +388,3 @@ def get_val2(f): raise ValueError(error_message) return summary - - -# add test data and compare with it - -# load test data -if __name__ == '__main__': - from eegprep import pop_loadset - - eeg1 = pop_loadset('../../sample_data/eeglab_data_tmp.set') - eeg2 = pop_loadset('../../sample_data/eeglab_data_tmp.set') - - # compare - eeg_compare(eeg1, eeg2) diff --git a/src/eegprep/functions/popfunc/eeg_eegrej.py b/src/eegprep/functions/popfunc/eeg_eegrej.py index a3e01616..40958f20 100644 --- a/src/eegprep/functions/popfunc/eeg_eegrej.py +++ b/src/eegprep/functions/popfunc/eeg_eegrej.py @@ -1,11 +1,15 @@ """EEG data rejection functions.""" +import logging from typing import List, Dict, Optional, Tuple import numpy as np from copy import deepcopy from ..miscfunc.misc import round_mat +logger = logging.getLogger(__name__) + + def _is_boundary_event(event: Dict) -> bool: t = event.get("type") if isinstance(t, str): @@ -359,7 +363,7 @@ def _combine_regions(regs): merged.append([beg, end]) newregs = np.asarray(merged, dtype=np.int64) if newregs.shape[0] != regs.shape[0]: - print("Warning: overlapping regions detected and fixed in eeg_eegrej") + logger.warning("Overlapping regions detected and fixed in eeg_eegrej") return newregs diff --git a/src/eegprep/functions/popfunc/eeg_interp.py b/src/eegprep/functions/popfunc/eeg_interp.py index 57f62b2a..8ddf8eb8 100644 --- a/src/eegprep/functions/popfunc/eeg_interp.py +++ b/src/eegprep/functions/popfunc/eeg_interp.py @@ -4,21 +4,12 @@ methods including spherical spline interpolation. """ -# to do, look at line 83 and 84 and try to see if the MATLAB array output match. Run code side by side. - -# EEG = pop_loadset('sample_data/eeglab_data_tmp.set'); -# EEG = eeg_interp(EEG, [1, 2, 3], 'spherical'); % or EEG = eeg_interp(EEG, {'Fp1' 'Fp2' 'F7'}, 'spherical'); -# pop_save(EEG, 'sample_data/eeglab_data_tmp_out_matlab.set'); - import numpy as np from scipy.linalg import pinv from scipy.interpolate import RBFInterpolator, griddata from scipy.special import lpmv from copy import deepcopy -# absolute path for all files in data folder -data_path = '/Users/arno/Python/eegprep/sample_data/' # os.path.abspath('sample_data/') - def eeg_interp(EEG, bad_chans, method='spherical', t_range=None, params=None, dtype='float32'): """Interpolate missing or bad EEG channels using spherical spline. @@ -525,153 +516,3 @@ def computeg(x, y, z, xelec, yelec, zelec, params): g += ((2 * n + 1) / (n**m * (n + 1) ** m)) * Pn return g / (4 * np.pi) - - -# Test functions moved to tests/test_eeg_interp.py - - -def test_chanloc_interpolation(): - """Example usage of the new chanloc interpolation functionality. - - This demonstrates the three different cases. - """ - # Create a sample EEG structure - EEG = { - 'data': np.random.randn(4, 100, 1), # 4 channels, 100 time points, 1 trial - 'nbchan': 4, - 'pnts': 100, - 'trials': 1, - 'srate': 500, - 'xmin': 0, - 'xmax': 0.2, - 'chanlocs': [ - {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7}, - {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7}, - ], - } - - print("Original EEG structure:") - print(f"Data shape: {EEG['data'].shape}") - print(f"Number of channels: {EEG['nbchan']}") - print(f"Channel labels: {[ch['labels'] for ch in EEG['chanlocs']]}") - - # Case 1: Identical chanlocs (should return unchanged) - identical_chanlocs = EEG['chanlocs'].copy() - result1 = eeg_interp(EEG.copy(), identical_chanlocs) - print("\nCase 1 - Identical chanlocs:") - print(f"Data shape unchanged: {result1['data'].shape == EEG['data'].shape}") - print(f"Data is identical: {np.array_equal(result1['data'], EEG['data'])}") - - # Case 2: No overlap (should append new channels) - new_chanlocs = [ - {'labels': 'T7', 'X': 0.8, 'Y': 0.0, 'Z': 0.6}, - {'labels': 'T8', 'X': -0.8, 'Y': 0.0, 'Z': 0.6}, - ] - result2 = eeg_interp(EEG.copy(), new_chanlocs) - print("\nCase 2 - No overlap (append new channels):") - print(f"Original channels: {EEG['nbchan']}, After: {result2['nbchan']}") - print(f"Data shape: {EEG['data'].shape} -> {result2['data'].shape}") - print(f"New channel labels: {[ch['labels'] for ch in result2['chanlocs']]}") - - # Case 3: Existing channels are proper subset (should remap to new structure) - superset_chanlocs = [ - {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7}, - {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7}, - {'labels': 'C3', 'X': 0.6, 'Y': 0.0, 'Z': 0.8}, - {'labels': 'C4', 'X': -0.6, 'Y': 0.0, 'Z': 0.8}, - ] - result3 = eeg_interp(EEG.copy(), superset_chanlocs) - print("\nCase 3 - Existing subset of new structure:") - print(f"Original channels: {EEG['nbchan']}, After: {result3['nbchan']}") - print(f"Data shape: {EEG['data'].shape} -> {result3['data'].shape}") - print(f"Final channel labels: {[ch['labels'] for ch in result3['chanlocs']]}") - - return result1, result2, result3 - - -def test_ica_indices_update(): - """Test that ICA channel indices are properly updated when channels are. - - reordered. - - Test that ICA channel indices are properly updated when channels are - - reordered during interpolation with chanloc structures. - """ - # Create a sample EEG structure with ICA data - EEG = { - 'data': np.random.randn(4, 100, 1), # 4 channels, 100 time points, 1 trial - 'nbchan': 4, - 'pnts': 100, - 'trials': 1, - 'srate': 500, - 'xmin': 0, - 'xmax': 0.2, - 'chanlocs': [ - {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6}, - {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7}, - {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7}, - ], - # Add ICA fields - 'icasphere': np.eye(4), # 4x4 identity matrix (not empty) - 'icaweights': np.random.randn(4, 4), - 'icawinv': np.random.randn(4, 4), - 'icachansind': [0, 1, 2, 3], # All channels used for ICA (0-based) - 'chaninfo': { - 'icachansind': [0, 1, 2, 3], - }, - } - - print("Original EEG structure with ICA:") - print(f"Data shape: {EEG['data'].shape}") - print(f"Number of channels: {EEG['nbchan']}") - print(f"Channel labels: {[ch['labels'] for ch in EEG['chanlocs']]}") - print(f"ICA channel indices: {EEG['icachansind']}") - print(f"Chaninfo ICA indices: {EEG['chaninfo']['icachansind']}") - - # Test Case: Subset interpolation that causes channel reordering - # Create a superset where the existing channels appear in different order - superset_chanlocs = [ - {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7}, # was index 2, now 0 - {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6}, # was index 0, now 1 - {'labels': 'C3', 'X': 0.6, 'Y': 0.0, 'Z': 0.8}, # new channel, index 2 - {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6}, # was index 1, now 3 - {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7}, # was index 3, now 4 - {'labels': 'C4', 'X': -0.6, 'Y': 0.0, 'Z': 0.8}, # new channel, index 5 - ] - - result = eeg_interp(EEG.copy(), superset_chanlocs) - - print("\nAfter interpolation with reordering:") - print(f"Data shape: {EEG['data'].shape} -> {result['data'].shape}") - print(f"Number of channels: {EEG['nbchan']} -> {result['nbchan']}") - print(f"Channel labels: {[ch['labels'] for ch in result['chanlocs']]}") - print(f"ICA channel indices: {EEG['icachansind']} -> {result['icachansind']}") - - # Verify the mapping is correct: - # Original: Fp1=0, Fp2=1, F3=2, F4=3 - # New: F3=0, Fp1=1, C3=2, Fp2=3, F4=4, C4=5 - # So ICA indices should be updated: [0,1,2,3] -> [1,3,0,4] - expected_indices = [1, 3, 0, 4] # New positions of Fp1, Fp2, F3, F4 - - print(f"Expected ICA indices: {expected_indices}") - print(f"Actual ICA indices: {result['icachansind']}") - print(f"Mapping correct: {result['icachansind'] == expected_indices}") - - # Also verify chaninfo is updated - if 'chaninfo' in result and 'icachansind' in result['chaninfo']: - print(f"Chaninfo ICA indices: {result['chaninfo']['icachansind']}") - print(f"Chaninfo mapping correct: {result['chaninfo']['icachansind'] == expected_indices}") - - return result - - -# Uncomment to run the tests -# if __name__ == '__main__': -# test_chanloc_interpolation() -# test_ica_indices_update() diff --git a/src/eegprep/functions/popfunc/eeg_lat2point.py b/src/eegprep/functions/popfunc/eeg_lat2point.py index fd98161c..49c813d1 100644 --- a/src/eegprep/functions/popfunc/eeg_lat2point.py +++ b/src/eegprep/functions/popfunc/eeg_lat2point.py @@ -1,8 +1,13 @@ """EEG latency to point conversion utilities.""" +import logging + import numpy as np +logger = logging.getLogger(__name__) + + def eeg_lat2point(lat_array, epoch_array, srate, timewin, timeunit=1.0, **kwargs): """Convert latencies in time units (relative to per-epoch time 0) to latencies in data points assuming concatenated epochs (EEGLAB style). @@ -70,7 +75,7 @@ def eeg_lat2point(lat_array, epoch_array, srate, timewin, timeunit=1.0, **kwargs newlat[idx] = max_valid flag = 1 # mirror MATLAB's informational message - print('eeg_lat2point(): Points out of range detected. Points replaced with maximum value') + logger.warning("Points out of range detected. Points replaced with maximum value") else: raise ValueError('Error in eeg_lat2point(): Points out of range detected') diff --git a/src/eegprep/functions/popfunc/pop_load_frombids.py b/src/eegprep/functions/popfunc/pop_load_frombids.py index 4e21a456..57b6fa18 100644 --- a/src/eegprep/functions/popfunc/pop_load_frombids.py +++ b/src/eegprep/functions/popfunc/pop_load_frombids.py @@ -1083,7 +1083,7 @@ def error(msg: str): EEG = eeg_checkchanlocs(EEG) except ImportError: - print("eeg_checkchanlocs not available, skipping channel location check.") + logger.info("eeg_checkchanlocs not available, skipping channel location check.") # Assign channel types based on channel labels (matching MATLAB's eeg_getchantype behavior) # Standard 10-20 channel names that should be classified as EEG diff --git a/src/eegprep/functions/popfunc/pop_loadset.py b/src/eegprep/functions/popfunc/pop_loadset.py index 5251e46a..f3bd30a4 100644 --- a/src/eegprep/functions/popfunc/pop_loadset.py +++ b/src/eegprep/functions/popfunc/pop_loadset.py @@ -186,19 +186,6 @@ def _is_on(value): return bool(value) -def test_pop_loadset(): - """Test the pop_loadset function with a sample file.""" - file_path = './tmp2.set' - file_path = '/System/Volumes/Data/data/data/STUDIES/STERN/S04/Memorize.set' #'./eeglab_data_with_ica_tmp.set' - EEG = pop_loadset(file_path) - - # print the keys of the EEG dictionary - print(EEG.keys()) - - -if __name__ == "__main__": - test_pop_loadset() - # STILL OPEN QUESTION: Better to have empty MATLAB arrays as None for empty numpy arrays (current default). # The current default is to make it more MALTAB compatible. A lot of MATLAB function start indexing MATLAB # empty arrays to add values to them. This is not possible with None and would create more conversion and diff --git a/src/eegprep/functions/popfunc/pop_loadset_h5.py b/src/eegprep/functions/popfunc/pop_loadset_h5.py index 0c491e24..3447882e 100644 --- a/src/eegprep/functions/popfunc/pop_loadset_h5.py +++ b/src/eegprep/functions/popfunc/pop_loadset_h5.py @@ -301,16 +301,3 @@ def handle_generic_group(EEGTMP, key): EEG = eeg_checkset(EEG) return EEG - - -if __name__ == '__main__': - file_name = 'sample_data/eeglab_data_epochs_ica_hdf5.set' - EEG = pop_loadset_h5(file_name) - print(EEG['data'].shape) - print(EEG['icaweights'].shape) - print(EEG['icasphere'].shape) - print(EEG['icawinv'].shape) - print(EEG['icaact'].shape) -# file_name = 'eeglab_cont73.set' -# EEG = pop_loadset_h5(file_name) -# EEG['data'].shape diff --git a/src/eegprep/functions/popfunc/pop_saveset.py b/src/eegprep/functions/popfunc/pop_saveset.py index e09df3a0..f11925dd 100644 --- a/src/eegprep/functions/popfunc/pop_saveset.py +++ b/src/eegprep/functions/popfunc/pop_saveset.py @@ -551,23 +551,6 @@ def _save_two_files(EEG, savemode): return bool(datfile and savemode == 'resave') or savetwofiles_enabled() -def test_pop_saveset(): - """Test pop_saveset function.""" - from eegprep.functions.popfunc.pop_loadset import pop_loadset - - file_path = './sample_data/eeglab_data_with_ica_tmp.set' - EEG = pop_loadset(file_path) - pop_saveset(EEG, '/Users/arno/Python/eegprep/sample_data/tmp.set') - pop_saveset_old( - EEG, '/Users/arno/Python/eegprep/sample_data/tmp2.set' - ) # does not do events and function above is better - # print the keys of the EEG dictionary - print(EEG.keys()) - - -if __name__ == '__main__': - test_pop_saveset() - # STILL OPEN QUESTION: Better to have empty MATLAB arrays as None for empty numpy arrays (current default). # The current default is to make it more MALTAB compatible. A lot of MATLAB function start indexing MATLAB # empty arrays to add values to them. This is not possible with None and would create more conversion and diff --git a/src/eegprep/functions/popfunc/pop_select.py b/src/eegprep/functions/popfunc/pop_select.py index 38a8926c..bba0a0aa 100644 --- a/src/eegprep/functions/popfunc/pop_select.py +++ b/src/eegprep/functions/popfunc/pop_select.py @@ -1,4 +1,5 @@ import copy +import logging import re from typing import Any @@ -18,6 +19,9 @@ from eegprep.functions.popfunc.eeg_eegrej import eeg_eegrej +logger = logging.getLogger(__name__) + + def pop_select(EEG, *args, gui=None, renderer=None, return_com=False, **kwargs): """Select EEG data using EEGLAB ``pop_select`` semantics.""" options = parse_key_value_args(args, kwargs) @@ -184,7 +188,7 @@ def _numeric_channel_indices(values): inds, _ = eeg_decodechan(EEG, g['channel'], 'labels', True) # show warning if not all channels are found and error if no channels are found if len(inds) != len(g['channel']): - print(f"Warning: {len(g['channel']) - len(inds)} channels not found") + logger.warning("%s channels not found", len(g['channel']) - len(inds)) if len(inds) == 0: raise ValueError(f"Channels not found: {g['channel']}") chan_selected_flag[:] = False @@ -199,7 +203,7 @@ def _numeric_channel_indices(values): chan_selected_flag[np.array(inds, dtype=int)] = False # show warning if not all channels are found and error if no channels are found if len(inds) != len(g['nochannel']): - print(f"Warning: {len(g['nochannel']) - len(inds)} channels not found") + logger.warning("%s channels not found", len(g['nochannel']) - len(inds)) else: # by type @@ -226,7 +230,7 @@ def _normalize_range_matrix(x): if x.size <= 2: return np.array(x).reshape(1, 2) # vector form → [first last] - print('Warning: vector format for point/time range is deprecated') + logger.warning("Vector format for point/time range is deprecated") return np.array([x[0], x[-1]], dtype=float).reshape(1, 2) if x.shape[1] != 2: raise ValueError('Time/point range must contain exactly 2 columns') @@ -289,14 +293,14 @@ def _clip_time_matrix(mat): # 4) Informational prints (optional) if len(g['trial']) != trials: - print(f"Removing {trials - len(g['trial'])} trial(s)...") + logger.info("Removing %s trial(s)...", trials - len(g['trial'])) if len(g['channel']) != nbchan: - print(f"Removing {nbchan - len(g['channel'])} channel(s)...") + logger.info("Removing %s channel(s)...", nbchan - len(g['channel'])) # 5) Recompute event epoch indices and latencies when trials are dropped if len(g['trial']) != trials and (EEG.get('event') is not None and len(EEG.get('event', [])) > 0): if not any('epoch' in ev for ev in EEG['event']): - print('Pop_epoch warning: bad event format with epoch dataset, removing events') + logger.warning("Bad event format with epoch dataset, removing events") EEG['event'] = [] else: keepevent = [] @@ -317,7 +321,7 @@ def _clip_time_matrix(mat): ev['epoch'] = int(newindex[0] + 1) # back to 1-based for consistency diffevent = np.setdiff1d(np.arange(len(EEG['event'])), np.array(keepevent, dtype=int)) if diffevent.size: - print(f"Pop_select: removing {diffevent.size} unreferenced events") + logger.info("Removing %s unreferenced events", diffevent.size) EEG['event'] = [EEG['event'][i] for i in range(len(EEG['event'])) if i in keepevent] # 6) Apply time selection @@ -434,7 +438,7 @@ def _clip_time_matrix(mat): # erase dipfit if channels removed if len(chan_idx) != nbchan and _has_content(EEG.get('dipfit')): - print('warning: erasing dipole information since channels have been removed') + logger.warning("Erasing dipole information since channels have been removed") EEG['dipfit'] = np.array([]) EEG['roi'] = {} @@ -769,11 +773,3 @@ def _history_command(options): for key, value in options.items(): parts.extend([f"'{key}'", format_history_value(value, empty_sequence="{}")]) return f"EEG = pop_select( EEG, {', '.join(parts)});" - - -if __name__ == '__main__': - from eegprep.functions.popfunc.pop_loadset import pop_loadset - - EEG = pop_loadset('sample_data/eeglab_data.set') - EEG2 = pop_select(EEG, channel=['FP1', 'FP2']) - print(EEG2) diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr.py b/src/eegprep/plugins/ICLabel/eeg_autocorr.py index 5bb01a22..ee8cd208 100644 --- a/src/eegprep/plugins/ICLabel/eeg_autocorr.py +++ b/src/eegprep/plugins/ICLabel/eeg_autocorr.py @@ -56,27 +56,3 @@ def eeg_autocorr(EEG, pct_data=None): ac = ac[:, 1:] return ac - - -def test_eeg_autocorr(): - """Test the eeg_autocorr function.""" - EEG = { - 'srate': 256, - 'icaweights': np.random.randn(10, 256), - 'pnts': 1000, - 'trials': 5, - 'icaact': np.random.randn(10, 1000, 5), - } - - eeg_autocorr(EEG, 100) - - # print information about psdmed - # print(psdmed.shape) - - # print(psdmed) - - # assert psdmed.shape == (10, 100) - # assert np.all(np.isfinite(psdmed)) - - -# test_eeg_autocorr() diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py b/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py index 6c77a3bb..d00242cc 100644 --- a/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py +++ b/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py @@ -7,7 +7,6 @@ import numpy as np from scipy.fft import fft, ifft, next_fast_len from scipy.signal import resample_poly -from ...functions.popfunc.pop_loadset import pop_loadset def eeg_autocorr_fftw(EEG, pct_data=100): @@ -60,29 +59,3 @@ def eeg_autocorr_fftw(EEG, pct_data=100): ac = ac[:, 1:101] return ac - - -def test_eeg_autocorr_fftw(): - """Test function for eeg_autocorr_fftw.""" - EEG = { - 'srate': 256, - 'icaweights': np.random.randn(10, 256), - 'pnts': 1000, - 'trials': 5, - 'icaact': np.random.randn(10, 1000, 5), - } - EEG = pop_loadset('/System/Volumes/Data/data/data/STUDIES/STERN/S01/Memorize.set') - - # reshape the last two dimensions of EEG['icaact'] - # EEG['icaact'] = EEG['icaact'].reshape(EEG['icaact'].shape[0], -1) - - # convert EEG['icaact'] to double precision - - psdmed = eeg_autocorr_fftw(EEG, 100) - - # print information about psdmed - print(psdmed.shape) - print(psdmed) - - -# test_eeg_autocorr_fftw() diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py b/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py index 62ed2c61..6dfaaf15 100644 --- a/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py +++ b/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py @@ -7,7 +7,6 @@ import numpy as np from scipy.signal import resample_poly import random -from ...functions.popfunc.pop_loadset import pop_loadset from numpy.fft import fft, ifft @@ -83,18 +82,3 @@ def eeg_autocorr_welch(EEG, pct_data=100): ac = ac[:, 1:101] return ac - - -def test_eeg_autocorr_welch(): - """Test function for eeg_autocorr_welch.""" - eeglab_file_path = './eeglab_data_with_ica_tmp.set' - EEG = pop_loadset(eeglab_file_path) - - eeg_autocorr_welch(EEG, 100) - - # print information about psdmed - # print(psdmed.shape) - # print(psdmed) - - -# test_eeg_autocorr_welch() diff --git a/src/eegprep/plugins/ICLabel/eeg_rpsd.py b/src/eegprep/plugins/ICLabel/eeg_rpsd.py index a4b52cc0..39e532a8 100644 --- a/src/eegprep/plugins/ICLabel/eeg_rpsd.py +++ b/src/eegprep/plugins/ICLabel/eeg_rpsd.py @@ -70,21 +70,3 @@ def eeg_rpsd(EEG, nfreqs=None, pct_data=100): psdmed[it, :] = 20 * np.log10(np.median(temp, axis=2)) return psdmed - - -def test_eeg_rpsd(): - """Test the eeg_rpsd function with sample data.""" - EEG = { - 'srate': 256, - 'icaweights': np.random.randn(10, 256), - 'pnts': 1000, - 'trials': 5, - 'icaact': np.random.randn(10, 1000, 5), - } - - psdmed = eeg_rpsd(EEG, 100) - assert psdmed.shape == (10, 100) - assert np.all(np.isfinite(psdmed)) - - -# test_eeg_rpsd() diff --git a/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py b/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py index f3b20c80..e4f01895 100644 --- a/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py +++ b/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py @@ -1,11 +1,14 @@ """ICLabel neural network model loading utilities.""" +import logging from pathlib import Path import scipy import scipy.io import torch +logger = logging.getLogger(__name__) + class Reshape(torch.nn.Module): """Reshape layer for PyTorch.""" @@ -42,11 +45,11 @@ def __init__(self, mat_path): iclabel_matlab = scipy.io.loadmat(mat_path) params = iclabel_matlab['params'][0] i = 11 - print('shape of param', i, torch.tensor(params[i][1]).shape) + logger.debug("shape of param %s: %s", i, torch.tensor(params[i][1]).shape) self.discriminator_image_layer1_conv = torch.nn.Conv2d( in_channels=1, out_channels=128, kernel_size=4, stride=2, padding=1, dilation=1 ) - print(self.discriminator_image_layer1_conv.weight.shape) + logger.debug("image layer 1 conv weight shape: %s", self.discriminator_image_layer1_conv.weight.shape) self.discriminator_image_layer1_conv.weight = torch.nn.Parameter(torch.tensor(params[0][1]).permute(3, 2, 0, 1)) self.discriminator_image_layer1_conv.bias = torch.nn.Parameter(torch.tensor(params[1][1]).squeeze()) self.discriminator_image_layer1_relu = torch.nn.LeakyReLU(0.2) @@ -132,7 +135,7 @@ def forward(self, image, psdmed, autocorr): x_image = self.discriminator_image_layer2_relu(x_image) x_image = self.discriminator_image_layer3_conv(x_image) x_image = self.discriminator_image_layer3_relu(x_image) - print('x_image', x_image.shape) + logger.debug("x_image shape: %s", x_image.shape) x_psdmed = self.discriminator_psdmed_layer1_conv_conv(psdmed) x_psdmed = self.discriminator_psdmed_layer1_conv_relu(x_psdmed) @@ -143,7 +146,7 @@ def forward(self, image, psdmed, autocorr): x_psdmed = self.discriminator_psdmed_reshape(x_psdmed) x_psdmed = self.discriminator_psdmed_concat1([x_psdmed] * 4) x_psdmed = self.discriminator_psdmed_concat2([x_psdmed] * 4) - print('x_psdmed', x_psdmed.shape) + logger.debug("x_psdmed shape: %s", x_psdmed.shape) x_autocorr = self.discriminator_autocorr_layer1_conv_conv(autocorr) x_autocorr = self.discriminator_autocorr_layer1_conv_relu(x_autocorr) @@ -154,11 +157,11 @@ def forward(self, image, psdmed, autocorr): x_autocorr = self.discriminator_autocorr_reshape(x_autocorr) x_autocorr = self.discriminator_autocorr_concat1([x_autocorr] * 4) x_autocorr = self.discriminator_autocorr_concat2([x_autocorr] * 4) - print('x_autocorr', x_autocorr.shape) + logger.debug("x_autocorr shape: %s", x_autocorr.shape) x = self.discriminator_concat([x_image, x_psdmed, x_autocorr]) x = self.discriminator_conv(x) - print('x', x.shape) + logger.debug("x shape: %s", x.shape) # subtract max value to avoid overflow x = x - torch.max(x, dim=1, keepdim=True).values x = self.discriminator_softmax(x) @@ -174,13 +177,13 @@ def forward(self, image, psdmed, autocorr): autocorr_mat = data['grid'][0][2] # assuming third dimension is trivial and last dimension is channel. First two dimensions (32 x 32) are size of topoplot image = torch.tensor(image_mat).permute(-1, 2, 0, 1) - print('image shape', image.shape) + logger.debug("image shape: %s", image.shape) psdmed = torch.tensor(psdmed_mat).permute(-1, 2, 0, 1) - print('psd shape', psdmed.shape) + logger.debug("psd shape: %s", psdmed.shape) autocorr = torch.tensor(autocorr_mat).permute(-1, 2, 0, 1) - print('autocorr shape', autocorr.shape) + logger.debug("autocorr shape: %s", autocorr.shape) output = model(image, psdmed, autocorr) - print(output.shape) + logger.debug("output shape: %s", output.shape) # save the output to a mat file scipy.io.savemat('output4_py.mat', {'output': output.detach().numpy()}) diff --git a/tests/test_eeg_eegrej.py b/tests/test_eeg_eegrej.py index 23cf5fee..13e56c4a 100644 --- a/tests/test_eeg_eegrej.py +++ b/tests/test_eeg_eegrej.py @@ -2,7 +2,6 @@ import tempfile import unittest import numpy as np -from unittest.mock import patch # Assume eeg_eegrej is defined as in your module that imports: from eegrej import eegrej from eegprep import eeg_eegrej @@ -253,10 +252,12 @@ def test_eeg_eegrej_overlapping_regions(self): # Overlapping regions: [3, 7] and [5, 10] should merge to [3, 10] regions = np.array([[3, 7], [5, 10]]) - with patch('builtins.print') as mock_print: + with self.assertLogs("eegprep.functions.popfunc.eeg_eegrej", level="WARNING") as captured: result = eeg_eegrej(EEG, regions) - # Should print warning about overlapping regions - mock_print.assert_called_with("Warning: overlapping regions detected and fixed in eeg_eegrej") + + self.assertTrue( + any("Overlapping regions detected and fixed in eeg_eegrej" in message for message in captured.output) + ) # Should have 20 - 8 = 12 samples remaining (removed samples 3-10) self.assertEqual(result['pnts'], 12) diff --git a/tests/test_eeglabcompat.py b/tests/test_eeglabcompat.py index 8ffd7e79..910cb7f9 100644 --- a/tests/test_eeglabcompat.py +++ b/tests/test_eeglabcompat.py @@ -8,6 +8,7 @@ import os import unittest from copy import deepcopy +from pathlib import Path import numpy as np @@ -22,11 +23,51 @@ from eegprep import clean_artifacts, pop_loadset from eegprep.functions.adminfunc.eeg_checkset import eeg_checkset from eegprep.utils.testing import DebuggableTestCase +import eegprep.functions.adminfunc.eeglabcompat as eeglabcompat # Path to test data LOCAL_DATA_PATH = os.path.join(os.path.dirname(__file__), '../sample_data/') +def test_eeglab_clean_artifacts_roundtrip_uses_private_tempdir(monkeypatch, tmp_path): + paths: dict[str, list[Path]] = {"save": [], "matlab_load": [], "matlab_save": [], "load": []} + + class DummyEeglab: + def pop_loadset(self, filename): + paths["matlab_load"].append(Path(filename)) + return {"loaded": filename} + + def clean_artifacts(self, EEG, *_args): + return {"cleaned": EEG} + + def pop_saveset(self, EEG, filename): + paths["matlab_save"].append(Path(filename)) + Path(filename).write_text("cleaned", encoding="utf-8") + return EEG + + def fake_pop_saveset(EEG, filename): + paths["save"].append(Path(filename)) + Path(filename).write_text("input", encoding="utf-8") + return EEG + + def fake_pop_loadset(filename): + paths["load"].append(Path(filename)) + return {"loaded_from": str(filename)} + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(eeglabcompat, "get_eeglab", lambda auto_file_roundtrip=False: DummyEeglab()) + monkeypatch.setattr(eeglabcompat, "pop_saveset", fake_pop_saveset) + monkeypatch.setattr(eeglabcompat, "pop_loadset", fake_pop_loadset) + + result = eeglabcompat.clean_artifacts({"data": np.zeros((1, 4))}, BurstCriterion="off") + + assert result["loaded_from"].endswith("output.set") + assert not (tmp_path / "tmp.set").exists() + assert not (tmp_path / "tmp2.set").exists() + assert all(path.parent != tmp_path for values in paths.values() for path in values) + assert {path.name for values in paths.values() for path in values} == {"input.set", "output.set"} + + class TestMatlabWrapper(DebuggableTestCase): """Test cases for MatlabWrapper class.""" diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py index 957d2cbb..4a7400b9 100644 --- a/tests/test_gui_main_window.py +++ b/tests/test_gui_main_window.py @@ -982,6 +982,26 @@ def test_pop_topoplot_menu_actions_record_history_without_replacing_dataset(self self.assertIs(session.ALLEEG[0], original_eeg) self.assertEqual(session.ALLCOM[-1], "pop_topoplot(EEG, typeplot=1, items=[0])") + def test_dipfit_mutating_menu_action_updates_session_and_history(self): + session = EEGPrepSession() + session.store_current(_demo_eeg(), new=True) + dispatcher = MenuActionDispatcher(session) + original = session.EEG + fitted = dict(original, setname="dipfit updated") + fitted["dipfit"] = {"model": [{"rv": 0.01}]} + + with mock.patch( + "eegprep.plugins.dipfit.pop_dipfit_gridsearch.pop_dipfit_gridsearch", + return_value=(fitted, "EEG = pop_dipfit_gridsearch(EEG, select=[1]);"), + ) as gridsearch: + dispatcher.dispatch("pop_dipfit_gridsearch") + + gridsearch.assert_called_once_with(original, return_com=True) + self.assertEqual(session.EEG["setname"], "dipfit updated") + self.assertEqual(session.ALLEEG[0]["dipfit"]["model"][0]["rv"], 0.01) + self.assertEqual(session.LASTCOM, "EEG = pop_dipfit_gridsearch(EEG, select=[1]);") + self.assertEqual(session.ALLCOM[-1], "EEG = pop_dipfit_gridsearch(EEG, select=[1]);") + def test_copyset_menu_updates_alleeg_eeg_currentset_and_history(self): session = EEGPrepSession() session.store_current(_demo_eeg(), new=True) diff --git a/tests/test_processing_logging_contract.py b/tests/test_processing_logging_contract.py new file mode 100644 index 00000000..2619b065 --- /dev/null +++ b/tests/test_processing_logging_contract.py @@ -0,0 +1,70 @@ +import logging + +import numpy as np +import pytest + +from eegprep.functions.popfunc.eeg_eegrej import _combine_regions +from eegprep.functions.popfunc.eeg_lat2point import eeg_lat2point +from eegprep.functions.popfunc.pop_select import pop_select + + +def _minimal_eeg(): + return { + "data": np.zeros((2, 20), dtype=np.float32), + "nbchan": 2, + "pnts": 20, + "trials": 1, + "srate": 100, + "xmin": 0, + "xmax": 0.19, + "times": np.arange(20), + "chanlocs": [{"labels": "Cz"}, {"labels": "Pz"}], + "event": [], + "urevent": [], + "epoch": [], + "history": "", + "icaact": np.array([]), + "icaweights": np.array([]), + "icasphere": np.array([]), + "icawinv": np.array([]), + "icachansind": np.array([], dtype=int), + "chaninfo": {}, + "reject": {}, + } + + +def test_pop_select_warnings_use_logging_not_stdout(capsys, caplog): + caplog.set_level(logging.WARNING) + + with pytest.raises(ValueError, match="Channels not found"): + pop_select(_minimal_eeg(), channel=["Missing"]) + + captured = capsys.readouterr() + assert captured.out == "" + assert captured.err == "" + assert "channels not found" in caplog.text + + +def test_latency_range_warning_uses_logging_not_stdout(capsys, caplog): + caplog.set_level(logging.WARNING) + + newlat, flag = eeg_lat2point([2], [1], 1, [0, 0], outrange=1) + + captured = capsys.readouterr() + assert captured.out == "" + assert captured.err == "" + assert flag == 1 + np.testing.assert_array_equal(newlat, np.array([1.0])) + assert "Points out of range detected" in caplog.text + + +def test_eegrej_overlap_warning_uses_logging_not_stdout(capsys, caplog): + caplog.set_level(logging.WARNING) + + combined = _combine_regions(np.array([[1, 3], [3, 5], [10, 12]])) + + captured = capsys.readouterr() + assert captured.out == "" + assert captured.err == "" + np.testing.assert_array_equal(combined, np.array([[1, 5], [10, 12]])) + assert "Overlapping regions detected" in caplog.text diff --git a/tests/test_visual_parity.py b/tests/test_visual_parity.py index fd45bc7a..db6bd9a9 100644 --- a/tests/test_visual_parity.py +++ b/tests/test_visual_parity.py @@ -11,7 +11,10 @@ from tools.visual_parity.config import load_manifest from tools.visual_parity.export_eegprep_menu_inventory import export_inventory from tools.visual_parity.menu_inventory import compare_menu_trees -from eegprep.functions.guifunc.visual_capture import _main_window_menu_state as _eegprep_main_window_menu_state +from eegprep.functions.guifunc.visual_capture import ( + _capture_case_handlers, + _main_window_menu_state as _eegprep_main_window_menu_state, +) ONE_PIXEL_PNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=" @@ -131,6 +134,18 @@ def test_load_manifest_parses_cases(self): self.assertEqual(cases["pop_interp_dataset_index_dialog"].targets["eeglab"].action, "inputdlg2:dataset_index") self.assertEqual(cases["pop_reref_help_dialog"].targets["eeglab"].action, "pophelp:pop_reref") + def test_eegprep_visual_capture_registry_covers_manifest_cases(self): + handlers = _capture_case_handlers() + cases = load_manifest() + + for case_id, case in cases.items(): + eegprep_target = case.targets.get("eegprep") + if eegprep_target is None: + continue + if "eegprep.functions.guifunc.visual_capture" in eegprep_target.command: + with self.subTest(case_id=case_id): + self.assertIn(case_id, handlers) + class VisualParityCaptureTests(unittest.TestCase): def test_capture_command_receives_output_environment(self): From 7df77bede25ed76f562a1dae61845cf2b0bc3a3f Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 00:00:25 -0700 Subject: [PATCH 10/16] Keep console eegh bound to session history --- src/eegprep/functions/adminfunc/console.py | 4 ++++ tests/test_console_workspace.py | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/eegprep/functions/adminfunc/console.py b/src/eegprep/functions/adminfunc/console.py index 1793f743..88a9b8b9 100644 --- a/src/eegprep/functions/adminfunc/console.py +++ b/src/eegprep/functions/adminfunc/console.py @@ -241,6 +241,8 @@ def __init__(self, bridge: EEGPrepConsoleWorkspace) -> None: def __getattr__(self, name: str) -> Any: if name.startswith("pop_"): return self._bridge.pop_wrapper(name) + if name == "eegh": + return self._bridge.namespace["eegh"] return getattr(eegprep, name) def __dir__(self) -> list[str]: @@ -451,6 +453,8 @@ def _bind_exports(self, exports: Mapping[str, Any] | None) -> None: for name in export_names: if name == "__version__": self.namespace[name] = eegprep.__version__ + elif name == "eegh": + continue elif name.startswith("pop_"): wrapped = ConsolePopFunction(name, self, None if exports is None else exports[name]) self._wrapped_pop_exports[name] = wrapped diff --git a/tests/test_console_workspace.py b/tests/test_console_workspace.py index 53780922..b936d1b7 100644 --- a/tests/test_console_workspace.py +++ b/tests/test_console_workspace.py @@ -367,6 +367,16 @@ def test_console_eegh_displays_and_finds_session_history(): workspace.close() +def test_default_console_eegh_uses_session_history_after_public_exports_bind(): + session = EEGPrepSession() + workspace = EEGPrepConsoleWorkspace(session) + session.add_history("EEG = pop_fileio('sample.set');") + + assert workspace.namespace["eegh"]() == "1. EEG = pop_fileio('sample.set');" + assert workspace.namespace["eegprep"].eegh() == "1. EEG = pop_fileio('sample.set');" + workspace.close() + + def test_console_eegh_positive_index_replays_command_through_workspace(): session = EEGPrepSession() session.store_current(_demo_eeg(), new=True) From ba83d88207509486be90948c97b90b5226881b3a Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 00:16:19 -0700 Subject: [PATCH 11/16] Rename CLI migration command --- docs/source/user_guide/agent_cli.rst | 19 ++++++------- .../cli/commands/{eeglab.py => migrate.py} | 24 ++++++++--------- src/eegprep/cli/discovery.py | 14 +++++----- src/eegprep/cli/main.py | 4 +-- ...s.py => test_cli_bids_migrate_commands.py} | 27 ++++++++++--------- tests/test_cli_main.py | 4 +++ 6 files changed, 50 insertions(+), 42 deletions(-) rename src/eegprep/cli/commands/{eeglab.py => migrate.py} (90%) rename tests/{test_cli_bids_eeglab_commands.py => test_cli_bids_migrate_commands.py} (89%) diff --git a/docs/source/user_guide/agent_cli.rst b/docs/source/user_guide/agent_cli.rst index b4bef338..f97fb9df 100644 --- a/docs/source/user_guide/agent_cli.rst +++ b/docs/source/user_guide/agent_cli.rst @@ -123,18 +123,19 @@ QC results include stable recommendation codes that an agent can reason over. HTML reports are for human review; the paired JSON and manifests are for automation. -BIDS And EEGLAB Migration -========================= +BIDS And Migration +================== .. code-block:: bash eegprep bids validate bids_root --json eegprep bids import bids_root --subject 01 --task rest --output sub-01.set --json eegprep bids export clean.set --bids-root bids_out --subject 01 --task rest --json - eegprep eeglab history old_pipeline.set --json - eegprep eeglab compare matlab_output.set eegprep_output.set --json - eegprep eeglab convert-script old_pipeline.m --output preprocess.yaml --json - -The EEGLAB helpers are migration aids. Script conversion is intentionally -best-effort and reports unsupported commands instead of silently inventing -behavior. + eegprep migrate history old_pipeline.set --json + eegprep migrate compare matlab_output.set eegprep_output.set --json + eegprep migrate convert-script old_pipeline.m --output preprocess.yaml --json + +Migration helpers can inspect EEGLAB command histories and compare datasets +without making normal EEGPrep CLI usage depend on MATLAB or an EEGLAB checkout. +Script conversion is intentionally best-effort and reports unsupported commands +instead of silently inventing behavior. diff --git a/src/eegprep/cli/commands/eeglab.py b/src/eegprep/cli/commands/migrate.py similarity index 90% rename from src/eegprep/cli/commands/eeglab.py rename to src/eegprep/cli/commands/migrate.py index 51da4bf7..3cbd6e34 100644 --- a/src/eegprep/cli/commands/eeglab.py +++ b/src/eegprep/cli/commands/migrate.py @@ -1,4 +1,4 @@ -"""EEGLAB migration and compatibility commands for the EEGPrep CLI.""" +"""Migration and compatibility commands for the EEGPrep CLI.""" from __future__ import annotations @@ -33,22 +33,22 @@ def register(subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: - """Register ``eeglab`` compatibility commands.""" - parser = subparsers.add_parser("eeglab", help="Inspect EEGLAB history and migration compatibility.") - eeglab_sub = parser.add_subparsers(dest="eeglab_command", required=True) + """Register migration compatibility commands.""" + parser = subparsers.add_parser("migrate", help="Inspect old EEGLAB histories and migration compatibility.") + migrate_sub = parser.add_subparsers(dest="migrate_command", required=True) - history_parser = eeglab_sub.add_parser("history", help="Inspect mapped EEGLAB history operations.") + history_parser = migrate_sub.add_parser("history", help="Inspect mapped EEGLAB history operations.") history_parser.add_argument("input") history_parser.add_argument("--json", action="store_true") history_parser.set_defaults(handler=lambda args: history(args.input)) - compare_parser = eeglab_sub.add_parser("compare", help="Compare two EEGLAB .set datasets.") + compare_parser = migrate_sub.add_parser("compare", help="Compare two EEGLAB .set datasets.") compare_parser.add_argument("left") compare_parser.add_argument("right") compare_parser.add_argument("--json", action="store_true") compare_parser.set_defaults(handler=lambda args: compare(args.left, args.right)) - convert = eeglab_sub.add_parser("convert-script", help="Best-effort conversion of simple EEGLAB scripts to YAML.") + convert = migrate_sub.add_parser("convert-script", help="Best-effort conversion of simple EEGLAB scripts to YAML.") convert.add_argument("script") convert.add_argument("--to", choices=("eegprep-yaml",), default="eegprep-yaml") convert.add_argument("--output") @@ -95,7 +95,7 @@ def history(input_path: str | Path) -> dict[str, Any]: operations.append(record) return { "status": "ok", - "schema_version": "eegprep.eeglab.history.v1", + "schema_version": "eegprep.migrate.history.v1", "input": str(input_path), "history_detected": bool(operations), "operations": operations, @@ -148,7 +148,7 @@ def compare(left: str | Path, right: str | Path) -> dict[str, Any]: differences.append({"path": "data", "code": "DATA_VALUE_MISMATCH", "max_abs_diff": max_abs_diff}) return { "status": "ok", - "schema_version": "eegprep.eeglab.compare.v1", + "schema_version": "eegprep.migrate.compare.v1", "left": str(left), "right": str(right), "equivalent": not differences, @@ -207,7 +207,7 @@ def convert_script( output_path_value = str(destination) return { "status": "ok", - "schema_version": "eegprep.eeglab.convert_script.v1", + "schema_version": "eegprep.migrate.convert_script.v1", "source": str(source), "target": target, "output": output_path_value, @@ -219,10 +219,10 @@ def convert_script( def main(argv: list[str] | None = None) -> int: """Standalone module harness for tests and local debugging.""" - parser = argparse.ArgumentParser(prog="eegprep eeglab") + parser = argparse.ArgumentParser(prog="eegprep migrate") subparsers = parser.add_subparsers(dest="command", required=True) register(subparsers) - args = parser.parse_args(["eeglab", *(sys.argv[1:] if argv is None else argv)]) + args = parser.parse_args(["migrate", *(sys.argv[1:] if argv is None else argv)]) result = args.handler(args) print(json.dumps(json_safe(result), sort_keys=True)) return 0 if result.get("status") in {"ok", "warning"} else 1 diff --git a/src/eegprep/cli/discovery.py b/src/eegprep/cli/discovery.py index 25b49268..932649d4 100644 --- a/src/eegprep/cli/discovery.py +++ b/src/eegprep/cli/discovery.py @@ -70,7 +70,7 @@ def capabilities() -> dict[str, Any]: "supports_json": True, "supports_dry_run": False, }, - "eeglab": { + "migrate": { "description": "Inspect EEGLAB history, compare datasets, and convert simple MATLAB histories.", "inputs": ["eeglab_set", "matlab_script"], "outputs": ["json", "eegprep_pipeline_yaml"], @@ -149,9 +149,9 @@ def command_schema(command: str) -> dict[str, Any]: "task": {"type": "string"}, }, }, - "eeglab": { - "schema_version": "eegprep.schema.command.eeglab.v1", - "syntax": "eegprep eeglab ... --json", + "migrate": { + "schema_version": "eegprep.schema.command.migrate.v1", + "syntax": "eegprep migrate ... --json", "required": ["subcommand"], "properties": { "left": {"type": "string"}, @@ -238,9 +238,9 @@ def examples(name: str) -> dict[str, Any]: "eegprep bids validate bids_root --json", "eegprep bids export input.set --bids-root bids_out --subject 01 --task rest --json", ], - "eeglab": [ - "eegprep eeglab history sample_data/eeglab_data.set --json", - "eegprep eeglab compare left.set right.set --json", + "migrate": [ + "eegprep migrate history sample_data/eeglab_data.set --json", + "eegprep migrate compare left.set right.set --json", ], "skills": ["eegprep skills list --json", "eegprep skills get eegprep-cli"], } diff --git a/src/eegprep/cli/main.py b/src/eegprep/cli/main.py index 265af365..9b728f03 100644 --- a/src/eegprep/cli/main.py +++ b/src/eegprep/cli/main.py @@ -13,7 +13,7 @@ from eegprep.cli.dataset import inspect_channels, inspect_dataset, inspect_events, inspect_ica, validate_dataset from eegprep.cli.commands import batch as batch_commands from eegprep.cli.commands import bids as bids_commands -from eegprep.cli.commands import eeglab as eeglab_commands +from eegprep.cli.commands import migrate as migrate_commands from eegprep.cli.commands import pipeline as pipeline_commands from eegprep.cli.commands import qc as qc_commands from eegprep.cli.commands import report as report_commands @@ -126,7 +126,7 @@ def build_parser() -> EEGPrepArgumentParser: report_commands.register(subparsers) batch_commands.register(subparsers) bids_commands.register(subparsers) - eeglab_commands.register(subparsers) + migrate_commands.register(subparsers) parser.set_defaults(handler=_handle_root) return parser diff --git a/tests/test_cli_bids_eeglab_commands.py b/tests/test_cli_bids_migrate_commands.py similarity index 89% rename from tests/test_cli_bids_eeglab_commands.py rename to tests/test_cli_bids_migrate_commands.py index 86a9cec0..403cd176 100644 --- a/tests/test_cli_bids_eeglab_commands.py +++ b/tests/test_cli_bids_migrate_commands.py @@ -103,8 +103,8 @@ def test_bids_export_refuses_non_empty_root_without_overwrite(tmp_path): assert existing.read_text(encoding="utf-8") == "existing" -def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path): - from eegprep.cli.commands import eeglab as eeglab_cli +def test_migrate_history_maps_supported_and_unsupported_commands(tmp_path): + from eegprep.cli.commands import migrate as migrate_cli set_file = tmp_path / "history.set" eeg = _eeg() @@ -117,9 +117,10 @@ def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path): ) pop_saveset(eeg, set_file) - payload = eeglab_cli.history(set_file) + payload = migrate_cli.history(set_file) assert payload["status"] == "ok" + assert payload["schema_version"] == "eegprep.migrate.history.v1" assert [operation["eeglab_command"] for operation in payload["operations"]] == [ "pop_loadset", "pop_resample", @@ -131,8 +132,8 @@ def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path): assert payload["operations"][2]["unsupported"]["code"] == "COMMAND_NOT_IMPLEMENTED" -def test_eeglab_compare_reports_structured_differences(tmp_path): - from eegprep.cli.commands import eeglab as eeglab_cli +def test_migrate_compare_reports_structured_differences(tmp_path): + from eegprep.cli.commands import migrate as migrate_cli left = tmp_path / "left.set" right = tmp_path / "right.set" @@ -144,9 +145,10 @@ def test_eeglab_compare_reports_structured_differences(tmp_path): pop_saveset(eeg_left, left) pop_saveset(eeg_right, right) - payload = eeglab_cli.compare(left, right) + payload = migrate_cli.compare(left, right) assert payload["status"] == "ok" + assert payload["schema_version"] == "eegprep.migrate.compare.v1" assert payload["equivalent"] is False differences_by_path = {difference["path"]: difference for difference in payload["differences"]} assert differences_by_path["srate"]["code"] == "VALUE_MISMATCH" @@ -154,8 +156,8 @@ def test_eeglab_compare_reports_structured_differences(tmp_path): assert payload["data"]["max_abs_diff"] == 1.25 -def test_eeglab_compare_reports_nan_placement_differences(tmp_path): - from eegprep.cli.commands import eeglab as eeglab_cli +def test_migrate_compare_reports_nan_placement_differences(tmp_path): + from eegprep.cli.commands import migrate as migrate_cli left = tmp_path / "left_nan.set" right = tmp_path / "right_nan.set" @@ -168,14 +170,14 @@ def test_eeglab_compare_reports_nan_placement_differences(tmp_path): pop_saveset(eeg_left, left) pop_saveset(eeg_right, right) - payload = eeglab_cli.compare(left, right) + payload = migrate_cli.compare(left, right) assert payload["equivalent"] is False assert any(difference["code"] == "DATA_FINITE_MASK_MISMATCH" for difference in payload["differences"]) -def test_eeglab_convert_script_reports_best_effort_conversion(tmp_path): - from eegprep.cli.commands import eeglab as eeglab_cli +def test_migrate_convert_script_reports_best_effort_conversion(tmp_path): + from eegprep.cli.commands import migrate as migrate_cli script = tmp_path / "pipeline.m" output = tmp_path / "pipeline.yaml" @@ -190,9 +192,10 @@ def test_eeglab_convert_script_reports_best_effort_conversion(tmp_path): encoding="utf-8", ) - payload = eeglab_cli.convert_script(script, output=output) + payload = migrate_cli.convert_script(script, output=output) assert payload["status"] == "ok" + assert payload["schema_version"] == "eegprep.migrate.convert_script.v1" assert payload["target"] == "eegprep-yaml" assert payload["converted_steps"][1]["name"] == "resample" assert payload["unsupported_commands"][0]["command"] == "topoplot" diff --git a/tests/test_cli_main.py b/tests/test_cli_main.py index 32dd4cb8..6deb05c8 100644 --- a/tests/test_cli_main.py +++ b/tests/test_cli_main.py @@ -37,6 +37,8 @@ def test_help_has_agent_start_section(): assert result.returncode == 0 assert "Start here (for AI agents):" in result.stdout assert "eegprep skills get eegprep-cli" in result.stdout + assert "migrate" in result.stdout + assert "eeglab Inspect EEGLAB history" not in result.stdout def test_capabilities_schema_examples_and_skill_are_json_readable(): @@ -49,6 +51,8 @@ def test_capabilities_schema_examples_and_skill_are_json_readable(): commands = _json_stdout(capabilities)["commands"] assert "filter" in commands assert "batch" in commands + assert "migrate" in commands + assert "eeglab" not in commands assert schema.returncode == 0 assert _json_stdout(schema)["schema"]["schema_version"] == "eegprep.schema.command.filter.v1" assert examples.returncode == 0 From 974ae6b123bf30eaca784cb8c735c8a28be246bb Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 00:44:05 -0700 Subject: [PATCH 12/16] Enable pop_newset description editing --- src/eegprep/functions/guifunc/qt.py | 19 ++++++ src/eegprep/functions/popfunc/pop_newset.py | 24 +++++-- tests/test_pop_newset.py | 71 +++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/eegprep/functions/guifunc/qt.py b/src/eegprep/functions/guifunc/qt.py index 3dc2ac8b..cecf1d49 100644 --- a/src/eegprep/functions/guifunc/qt.py +++ b/src/eegprep/functions/guifunc/qt.py @@ -470,6 +470,11 @@ def _connect_callback(self, callback: CallbackSpec | None, widgets: dict[str, An source = widgets.get(params["button"]) if source is not None: source.clicked.connect(lambda: self._show_callback_message(source, params)) + elif callback.name == "edit_text": + source = widgets.get(params["button"]) + target = widgets.get(params.get("target", params["button"])) + if source is not None and target is not None: + source.clicked.connect(lambda: self._edit_text(source, target, params)) elif callback.name == "open_eegplot": source = widgets.get(params["button"]) if source is not None: @@ -1114,6 +1119,20 @@ def _show_callback_message(parent: Any, params: Mapping[str, Any]) -> None: _qt_core, qt_widgets = _require_qt() qt_widgets.QMessageBox.information(parent, str(params.get("title", "EEGPrep")), str(params.get("message", ""))) + @staticmethod + def _edit_text(parent: Any, target: Any, params: Mapping[str, Any]) -> None: + _qt_core, qt_widgets = _require_qt() + stored_value = target.property(_VALUE_PROPERTY) + current = stored_value if stored_value is not None else params.get("value", "") + value, accepted = qt_widgets.QInputDialog.getMultiLineText( + parent, + str(params.get("title", "Edit text")), + str(params.get("label", "Text")), + str(current), + ) + if accepted: + target.setProperty(_VALUE_PROPERTY, str(value)) + @staticmethod def _select_interp_channels(button: Any, target: Any, params: Mapping[str, Any]) -> None: source = str(params.get("source", "")).lower() diff --git a/src/eegprep/functions/popfunc/pop_newset.py b/src/eegprep/functions/popfunc/pop_newset.py index 22c2b545..121304ed 100644 --- a/src/eegprep/functions/popfunc/pop_newset.py +++ b/src/eegprep/functions/popfunc/pop_newset.py @@ -72,6 +72,7 @@ def pop_newset( def pop_newset_dialog_spec(EEG: dict[str, Any], CURRENTSET: Any = None, *, guistring: str = "") -> DialogSpec: """Return the EEGLAB-like dialog spec for ``pop_newset``.""" dataset_name = str(EEG.get("setname") or "") + comments = _comments_text(EEG.get("comments", "")) prompt = guistring or "What do you want to do with the new dataset?" old_prompt = "What do you want to do with the old dataset (not modified since last saved)?" return DialogSpec( @@ -93,11 +94,13 @@ def pop_newset_dialog_spec(EEG: dict[str, Any], CURRENTSET: Any = None, *, guist "Edit description", tag="editdescription", callback=CallbackSpec( - "show_message", + "edit_text", params={ "button": "editdescription", + "target": "editdescription", "title": "Edit description", - "message": "Dataset description editing is available from the command line using the comments option.", + "label": "Dataset description:", + "value": comments, }, ), ), @@ -149,8 +152,11 @@ def _run_gui( "overwrite": "on" if overwrite else "off", "gui": "off", } - if "comments" in result: - gui_options["comments"] = str(result.get("comments") or "") + comments = result.get("comments") + if comments is None: + comments = result.get("editdescription") + if comments is not None: + gui_options["comments"] = str(comments) if _is_on(result.get("savenew")): gui_options["savenew"] = str(result.get("savefile") or "on").strip() or "on" return gui_options @@ -272,3 +278,13 @@ def _currentset_label(CURRENTSET: Any) -> str: if isinstance(CURRENTSET, (list, tuple)): return ", ".join(str(item) for item in CURRENTSET) if CURRENTSET else "0" return str(CURRENTSET or 0) + + +def _comments_text(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, (list, tuple)): + return "\n".join(str(item) for item in value) + return str(value) diff --git a/tests/test_pop_newset.py b/tests/test_pop_newset.py index ec3172ce..f4f56664 100644 --- a/tests/test_pop_newset.py +++ b/tests/test_pop_newset.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from eegprep.functions.guifunc.qt import QtDialogRenderer +from eegprep.functions.guifunc.spec import controls_by_tag from eegprep.functions.popfunc.eeg_emptyset import eeg_emptyset from eegprep.functions.popfunc.pop_newset import pop_newset, pop_newset_dialog_spec @@ -100,6 +102,75 @@ def test_pop_newset_dialog_old_dataset_prompt_hides_currentset_index(): assert "What do you want to do with the old dataset 1 (not modified since last saved)?" not in labels +def test_pop_newset_dialog_edit_description_opens_multiline_editor(): + eeg = _eeg(name="processed") + eeg["comments"] = ["first line", "second line"] + + control = controls_by_tag(pop_newset_dialog_spec(eeg, 1))["editdescription"] + + assert control.callback is not None + assert control.callback.name == "edit_text" + assert control.callback.params == { + "button": "editdescription", + "target": "editdescription", + "title": "Edit description", + "label": "Dataset description:", + "value": "first line\nsecond line", + } + + +def test_qt_edit_text_callback_stores_accepted_text(monkeypatch): + class QInputDialog: + @staticmethod + def getMultiLineText(_parent, title, label, text): + calls.append((title, label, text)) + return "", True + + class Widget: + def __init__(self): + self.properties = {} + + def property(self, name): + return self.properties.get(name) + + def setProperty(self, name, value): + self.properties[name] = value + + calls = [] + target = Widget() + QtWidgets = type("QtWidgets", (), {"QInputDialog": QInputDialog}) + monkeypatch.setattr("eegprep.functions.guifunc.qt._require_qt", lambda: (None, QtWidgets)) + + QtDialogRenderer._edit_text( + object(), + target, + {"title": "Edit description", "label": "Dataset description:", "value": "old notes"}, + ) + + assert calls == [("Edit description", "Dataset description:", "old notes")] + assert QtDialogRenderer._read_widget(target) == "" + + +def test_pop_newset_gui_description_button_value_updates_comments(): + class Renderer: + def run(self, _spec, initial_values=None): + return {"setname": "processed", "editdescription": "edited notes", "overwrite": 1} + + alleeg, current, current_set, _command = pop_newset([], _eeg(name="original"), 0) + + alleeg, current, current_set, command = pop_newset( + alleeg, _eeg(name="processed"), current_set, "gui", "on", renderer=Renderer() + ) + + assert len(alleeg) == 2 + assert current_set == 2 + assert current["comments"] == "edited notes" + assert command == ( + "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, " + "'setname', 'processed', 'comments', 'edited notes', 'overwrite', 'off');" + ) + + def test_pop_newset_gui_choice_can_overwrite_current_dataset(): class Renderer: def run(self, _spec, initial_values=None): From 48fad4a5d2c916e83540ae9113c041fa5f2aa836 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 01:04:41 -0700 Subject: [PATCH 13/16] Run GUI ICA in background task --- .notes/implementation-notes.html | 24 ++++ .../source/user_guide/gui_console_session.rst | 8 ++ src/eegprep/functions/guifunc/long_task.py | 121 ++++++++++++++++++ src/eegprep/functions/guifunc/menu_actions.py | 63 ++++++++- src/eegprep/functions/popfunc/pop_runica.py | 17 ++- src/eegprep/resources/help/pop_runica.md | 7 + tests/conftest.py | 1 + tests/test_gui_long_task.py | 73 +++++++++++ tests/test_gui_main_window.py | 88 +++++++++++++ 9 files changed, 397 insertions(+), 5 deletions(-) create mode 100644 src/eegprep/functions/guifunc/long_task.py create mode 100644 tests/test_gui_long_task.py diff --git a/.notes/implementation-notes.html b/.notes/implementation-notes.html index f2d38960..0a23504f 100644 --- a/.notes/implementation-notes.html +++ b/.notes/implementation-notes.html @@ -542,5 +542,29 @@

Verification Notes

EEGLAB-facing 1-based public QC indices, and compare nonfinite data masks in eegprep eeglab compare. +

Async GUI ICA Progress Notes

+

Design Decisions

+
    +
  • Kept pop_runica itself synchronous for scripts, tests, CLI, + and console calls. The background worker is owned by the Qt menu action + layer because only the GUI needs to keep repainting while the computation + runs.
  • +
  • Split pop_runica GUI option collection from ICA execution + so the options dialog always opens on the main Qt thread and only the pure + computation runs in the worker.
  • +
  • Session mutation remains on the main thread. The worker returns an + updated EEG object and command string; EEGPrepSession is updated + only from the success callback.
  • +
+

Tradeoffs

+
    +
  • The progress dialog is indeterminate because runica progress is + iteration/log-message based rather than a reliable percentage. Cancellation + is intentionally not exposed until the ICA backends support safe + interruption.
  • +
  • The reusable long-task helper captures EEGPrep log messages for the + dialog, while the console action boundary keeps command echo and progress + output ordered for mixed GUI-plus-console workflows.
  • +
diff --git a/docs/source/user_guide/gui_console_session.rst b/docs/source/user_guide/gui_console_session.rst index 77acda0b..16f33382 100644 --- a/docs/source/user_guide/gui_console_session.rst +++ b/docs/source/user_guide/gui_console_session.rst @@ -101,6 +101,14 @@ GUI actions should update state through session helpers such as ``notify_changed()``. They should not mutate a GUI-only copy of ``EEG`` that the console cannot see. +Long-running GUI actions use the same session boundary. For example, +GUI-launched ICA opens the EEGLAB-like ``pop_runica`` options dialog on the main +thread, runs the ICA computation behind a progress dialog, then stores the +updated dataset and history only after the worker finishes successfully. While +the worker is running, progress messages are buffered safely for +``eegprep-console`` so the replayable command remains visible before related +output. + ``eegprep-console`` wraps registered ``pop_*`` functions. When a bare call such as ``pop_resample(EEG, 64)`` returns a dataset and command string, the wrapper stores the returned dataset, updates ``LASTCOM`` and ``ALLCOM``, and tells the diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py new file mode 100644 index 00000000..09bf32b1 --- /dev/null +++ b/src/eegprep/functions/guifunc/long_task.py @@ -0,0 +1,121 @@ +"""Qt helper for running long GUI actions off the UI thread.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +import logging +from typing import Any + +try: # pragma: no cover - depends on optional GUI dependency + from PySide6 import QtCore, QtWidgets +except ImportError: # pragma: no cover - depends on optional GUI dependency + QtCore = None + QtWidgets = None + + +logger = logging.getLogger(__name__) + + +@dataclass +class LongTaskHandle: + """Keep Qt task objects alive until their worker thread finishes.""" + + thread: Any + worker: Any + dialog: Any + + +def run_long_task( + *, + parent: Any | None, + title: str, + label: str, + task: Callable[[], Any], + on_success: Callable[[Any], None], + on_error: Callable[[Exception], None], + on_finished: Callable[[LongTaskHandle], None] | None = None, +) -> LongTaskHandle: + """Run ``task`` in a Qt worker thread with an indeterminate progress dialog.""" + qt_core, qt_widgets = _require_qt() + + progress = qt_widgets.QProgressDialog(label, None, 0, 0, parent) + progress.setWindowTitle(title) + progress.setCancelButton(None) + progress.setAutoClose(False) + progress.setAutoReset(False) + progress.setMinimumDuration(0) + progress.setWindowModality(qt_core.Qt.WindowModal) + + class Worker(qt_core.QObject): + succeeded = qt_core.Signal(object) + failed = qt_core.Signal(object) + message = qt_core.Signal(str) + finished = qt_core.Signal() + + def run(self) -> None: + handler = _SignalLogHandler(self.message) + handler.setFormatter(logging.Formatter("%(message)s")) + handler.setLevel(logging.INFO) + eegprep_logger = logging.getLogger("eegprep") + old_level = eegprep_logger.level + if old_level == logging.NOTSET or old_level > logging.INFO: + eegprep_logger.setLevel(logging.INFO) + eegprep_logger.addHandler(handler) + try: + self.message.emit(label) + result = task() + self.succeeded.emit(result) + except Exception as exc: # noqa: BLE001 - forwarded to GUI error handler. + self.failed.emit(exc) + finally: + eegprep_logger.removeHandler(handler) + eegprep_logger.setLevel(old_level) + self.finished.emit() + + thread = qt_core.QThread() + worker = Worker() + handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress) + + worker.moveToThread(thread) + thread.started.connect(worker.run) + worker.message.connect(lambda message: _update_progress_label(progress, label, message)) + worker.succeeded.connect(on_success) + worker.failed.connect(on_error) + worker.finished.connect(thread.quit) + thread.finished.connect(worker.deleteLater) + thread.finished.connect(thread.deleteLater) + thread.finished.connect(progress.close) + if on_finished is not None: + thread.finished.connect(lambda: on_finished(handle)) + + progress._eegprep_long_task = handle + progress.show() + qt_core.QTimer.singleShot(0, thread.start) + return handle + + +class _SignalLogHandler(logging.Handler): + def __init__(self, signal: Any): + super().__init__(level=logging.INFO) + self.signal = signal + + def emit(self, record: logging.LogRecord) -> None: + try: + self.signal.emit(self.format(record)) + except Exception: + logger.exception("Failed to forward long-task log message") + + +def _update_progress_label(progress: Any, label: str, message: str) -> None: + message = str(message).strip() + progress.setLabelText(label if not message or message == label else f"{label}\n{message}") + + +def _require_qt() -> tuple[Any, Any]: + if QtCore is None or QtWidgets is None: + raise RuntimeError( + "PySide6 is required for EEGPrep GUI progress dialogs. Install it with " + "`pip install -e .[gui]` or `pip install eegprep[gui]`." + ) + return QtCore, QtWidgets diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py index 4e74e90c..3defe10a 100644 --- a/src/eegprep/functions/guifunc/menu_actions.py +++ b/src/eegprep/functions/guifunc/menu_actions.py @@ -10,6 +10,7 @@ from typing import Any from eegprep.extension_runtime import ExtensionRuntime +from eegprep.functions.guifunc.long_task import LongTaskHandle, run_long_task from eegprep.functions.guifunc.menu_placeholders import PLACEHOLDER_ACTIONS, is_placeholder_action, placeholder_message from eegprep.functions.guifunc.pophelp import pophelp from eegprep.functions.guifunc.session import EEGPrepSession, has_eeg_data @@ -231,6 +232,7 @@ def __init__( self.refresh = refresh self.native_file_dialogs = native_file_dialogs self.extension_runtime = extension_runtime or ExtensionRuntime.empty() + self._long_tasks: list[LongTaskHandle] = [] def dispatch_gui(self, action: str, parent: Any | None = None) -> None: """Run a menu action from Qt and show user-facing errors.""" @@ -1156,8 +1158,14 @@ def _run_pop_function(self, name: str, parent: Any | None, *, variant: str = "") out = pop_rmdat(selection, return_com=True) elif name == "pop_runica": - from eegprep.functions.popfunc.pop_runica import pop_runica + from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_gui_options + if parent is not None: + gui_options = pop_runica_gui_options(selection) + if gui_options is None: + return + self._run_pop_runica_long_task(selection, gui_options, parent) + return out = pop_runica(selection, return_com=True) elif name == "pop_select": from eegprep.functions.popfunc.pop_select import pop_select @@ -1300,6 +1308,59 @@ def commit_component_rejection(eeg_out: Any, _states: dict[int, bool]) -> None: self._store_current_from_gui(eeg_out, command=command) self._refresh() + def _run_pop_runica_long_task( + self, + selection: Any, + gui_options: Mapping[str, Any], + parent: Any, + ) -> None: + from eegprep.functions.popfunc.pop_runica import pop_runica + + self.session.begin_gui_action("pop_runica") + + def task() -> Any: + return pop_runica(selection, gui=False, return_com=True, **dict(gui_options)) + + def on_success(out: Any) -> None: + try: + if isinstance(out, tuple): + eeg_out, command = out[0], out[1] if len(out) > 1 else "" + else: + eeg_out, command = out, "" + if command: + self._store_current_from_gui(eeg_out, command=command) + self._refresh() + except Exception as exc: + logger.exception("EEGPrep GUI menu action failed: pop_runica") + self._warn(parent, str(exc)) + + def on_error(exc: Exception) -> None: + logger.error( + "EEGPrep GUI menu action failed: pop_runica", + exc_info=(type(exc), exc, exc.__traceback__), + ) + self._warn(parent, str(exc)) + + def on_finished(handle: LongTaskHandle) -> None: + if handle in self._long_tasks: + self._long_tasks.remove(handle) + self.session.end_gui_action("pop_runica") + + try: + handle = run_long_task( + parent=parent, + title="Running ICA decomposition", + label="Running ICA decomposition. This may take several minutes.", + task=task, + on_success=on_success, + on_error=on_error, + on_finished=on_finished, + ) + except Exception: + self.session.end_gui_action("pop_runica") + raise + self._long_tasks.append(handle) + def _run_browser_accept_pop_action( self, name: str, diff --git a/src/eegprep/functions/popfunc/pop_runica.py b/src/eegprep/functions/popfunc/pop_runica.py index 537f2e36..66f0c876 100644 --- a/src/eegprep/functions/popfunc/pop_runica.py +++ b/src/eegprep/functions/popfunc/pop_runica.py @@ -60,7 +60,7 @@ def pop_runica( elif gui is None: gui = options is None and not has_programmatic_options and chanind is None and dataset is None if gui: - gui_result = _run_gui(EEG, renderer=renderer, initial_values=_selectamica_initial_values(selectamica)) + gui_result = pop_runica_gui_options(EEG, renderer=renderer, selectamica=selectamica) if gui_result is None: return (EEG, "") if return_com else EEG icatype = gui_result["icatype"] @@ -70,9 +70,6 @@ def pop_runica( dataset = gui_result["dataset"] concatenate = gui_result["concatenate"] concatcond = gui_result["concatcond"] - if icatype == "runica": - options = dict(options) - options.setdefault("interrupt", "on") ica_options = _normalise_ica_options(icatype, options, parsed) if isinstance(EEG, list): @@ -202,6 +199,18 @@ def _run_gui(EEG, renderer=None, initial_values=None): } +def pop_runica_gui_options(EEG, *, renderer=None, selectamica: str | None = None) -> dict[str, Any] | None: + """Collect ``pop_runica`` GUI options without running the ICA backend.""" + gui_result = _run_gui(EEG, renderer=renderer, initial_values=_selectamica_initial_values(selectamica)) + if gui_result is None: + return None + if gui_result["icatype"] == "runica": + options = dict(gui_result["options"]) + options.setdefault("interrupt", "on") + gui_result["options"] = options + return gui_result + + def _runica_on_dataset(EEG, icatype, options, *, reorder, chanind): logger.info("Attempting to convert data matrix to double precision...") prepared = _prepare_ica_dataset(EEG) diff --git a/src/eegprep/resources/help/pop_runica.md b/src/eegprep/resources/help/pop_runica.md index f3498281..fd1abe72 100644 --- a/src/eegprep/resources/help/pop_runica.md +++ b/src/eegprep/resources/help/pop_runica.md @@ -30,6 +30,10 @@ Calling `pop_runica(EEG)` opens an EEGLAB-style dialog with: - Channel type/index selection controls. - For multiple datasets, a dataset selector and concatenate controls. +When `pop_runica` is started from the main EEGPrep GUI, the ICA computation +runs behind an indeterminate progress dialog so the window can continue +repainting while the decomposition is being computed. + Behavior: - Supplying a non-default `icatype` programmatically, for example @@ -41,6 +45,9 @@ Behavior: - Existing ICLabel classifications are removed when ICA is recomputed because they no longer describe the active components. - `EEG.icaweights`, `EEG.icasphere`, `EEG.icawinv`, `EEG.icaact`, and `EEG.icachansind` are updated. - GUI-launched runica adds `'interrupt', 'on'` to the history command, matching EEGLAB's GUI path. +- GUI-launched ICA stores the updated dataset only after the background + computation finishes successfully. Failed runs leave the current dataset and + history unchanged. Examples: diff --git a/tests/conftest.py b/tests/conftest.py index b132c768..a246c980 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,6 +50,7 @@ def _preload_matlab_libstdcxx() -> None: "tests/test_gui_pop_runica.py", "tests/test_gui_pop_select.py", "tests/test_gui_pop_study.py", + "tests/test_gui_long_task.py", "tests/test_gui_main_window.py", "tests/test_eegplot_gui.py", ) diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py new file mode 100644 index 00000000..c00fdb5e --- /dev/null +++ b/tests/test_gui_long_task.py @@ -0,0 +1,73 @@ +import logging +import os + +import pytest + +from eegprep.functions.guifunc.long_task import run_long_task + + +@pytest.fixture +def qapp(): + os.environ.setdefault("QT_QPA_PLATFORM", "offscreen") + pytest.importorskip("PySide6") + from PySide6 import QtWidgets + + app = QtWidgets.QApplication.instance() or QtWidgets.QApplication([]) + yield app + + +def test_run_long_task_returns_result_and_forwards_progress(qapp): + from PySide6 import QtCore + + loop = QtCore.QEventLoop() + results = [] + errors = [] + finished = [] + + def task(): + logging.getLogger("eegprep.tests").info("worker progress") + return "done" + + handle = run_long_task( + parent=None, + title="Running test task", + label="Running test task.", + task=task, + on_success=results.append, + on_error=errors.append, + on_finished=lambda task_handle: (finished.append(task_handle), loop.quit()), + ) + QtCore.QTimer.singleShot(3000, loop.quit) + loop.exec() + + assert results == ["done"] + assert errors == [] + assert finished == [handle] + assert "worker progress" in handle.dialog.labelText() + + +def test_run_long_task_reports_errors(qapp): + from PySide6 import QtCore + + loop = QtCore.QEventLoop() + results = [] + errors = [] + + def task(): + raise ValueError("task failed") + + run_long_task( + parent=None, + title="Running test task", + label="Running test task.", + task=task, + on_success=results.append, + on_error=errors.append, + on_finished=lambda _handle: loop.quit(), + ) + QtCore.QTimer.singleShot(3000, loop.quit) + loop.exec() + + assert results == [] + assert len(errors) == 1 + assert str(errors[0]) == "task failed" diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py index 4a7400b9..99e9819f 100644 --- a/tests/test_gui_main_window.py +++ b/tests/test_gui_main_window.py @@ -12,6 +12,7 @@ MenuActionDispatcher, action_kind, ) +from eegprep.functions.guifunc.long_task import LongTaskHandle from eegprep.functions.guifunc.menu_placeholders import is_placeholder_action, placeholder_message from eegprep.functions.guifunc.menu_spec import menu_enabled from eegprep.functions.guifunc.session import EEGPrepSession @@ -855,6 +856,93 @@ def test_new_main_window_pop_actions_dispatch_to_real_wrappers(self): else: self.assertEqual(session.ALLCOM[-1], f"EEG = {action}(EEG);") + def test_gui_pop_runica_runs_ica_in_long_task_before_committing_result(self): + session = EEGPrepSession() + session.store_current(_demo_eeg(), new=True) + refresh = mock.Mock() + dispatcher = MenuActionDispatcher(session, refresh=refresh) + output = dict(session.EEG, setname="ica") + options = { + "icatype": "runica", + "options": {"extended": 1, "interrupt": "on"}, + "reorder": "on", + "chanind": None, + "dataset": None, + "concatenate": "off", + "concatcond": "off", + } + captured = {} + handle = LongTaskHandle(thread=object(), worker=object(), dialog=object()) + events = [] + original_selection = session.EEG + session.add_gui_action_listener(lambda event, action: events.append((event, action))) + + def fake_run_long_task(**kwargs): + captured.update(kwargs) + return handle + + with ( + mock.patch("eegprep.functions.popfunc.pop_runica.pop_runica_gui_options", return_value=options), + mock.patch( + "eegprep.functions.popfunc.pop_runica.pop_runica", + return_value=(output, "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'interrupt', 'on');"), + ) as pop_func, + mock.patch("eegprep.functions.guifunc.menu_actions.run_long_task", side_effect=fake_run_long_task), + ): + dispatcher.dispatch("pop_runica", parent=object()) + self.assertEqual(session.EEG["setname"], "demo") + + result = captured["task"]() + captured["on_success"](result) + captured["on_finished"](handle) + + pop_func.assert_called_once_with(original_selection, gui=False, return_com=True, **options) + self.assertEqual(session.EEG["setname"], "ica") + self.assertEqual(session.ALLEEG[0]["setname"], "ica") + self.assertEqual( + session.ALLCOM[-1], + "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'interrupt', 'on');", + ) + refresh.assert_called_once() + self.assertEqual(events, [("begin", "pop_runica"), ("end", "pop_runica")]) + self.assertEqual(dispatcher._long_tasks, []) + + def test_gui_pop_runica_long_task_error_does_not_mutate_session(self): + session = EEGPrepSession() + session.store_current(_demo_eeg(), new=True) + dispatcher = MenuActionDispatcher(session) + options = { + "icatype": "runica", + "options": {"extended": 1}, + "reorder": "on", + "chanind": None, + "dataset": None, + "concatenate": "off", + "concatcond": "off", + } + captured = {} + handle = LongTaskHandle(thread=object(), worker=object(), dialog=object()) + warnings = [] + + def fake_run_long_task(**kwargs): + captured.update(kwargs) + return handle + + with ( + mock.patch("eegprep.functions.popfunc.pop_runica.pop_runica_gui_options", return_value=options), + mock.patch("eegprep.functions.guifunc.menu_actions.run_long_task", side_effect=fake_run_long_task), + mock.patch.object(dispatcher, "_warn", side_effect=lambda _parent, message: warnings.append(message)), + ): + dispatcher.dispatch("pop_runica", parent=object()) + captured["on_error"](ValueError("runica failed")) + captured["on_finished"](handle) + + self.assertEqual(session.EEG["setname"], "demo") + self.assertEqual(session.ALLEEG[0]["setname"], "demo") + self.assertEqual(session.ALLCOM, []) + self.assertEqual(warnings, ["runica failed"]) + self.assertEqual(dispatcher._long_tasks, []) + def test_gui_transform_action_can_commit_processed_dataset_as_new_set(self): session = EEGPrepSession() session.store_current(_demo_eeg(), new=True) From 8ba958e0a1d51cab03cebda6166b66b3d3822e7b Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 01:14:58 -0700 Subject: [PATCH 14/16] Marshal long task callbacks to UI thread --- src/eegprep/functions/guifunc/long_task.py | 42 +++++++++++++----- tests/test_gui_long_task.py | 51 +++++++++++++++++++++- 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py index 09bf32b1..0cefc811 100644 --- a/src/eegprep/functions/guifunc/long_task.py +++ b/src/eegprep/functions/guifunc/long_task.py @@ -14,9 +14,6 @@ QtWidgets = None -logger = logging.getLogger(__name__) - - @dataclass class LongTaskHandle: """Keep Qt task objects alive until their worker thread finishes.""" @@ -24,6 +21,7 @@ class LongTaskHandle: thread: Any worker: Any dialog: Any + receiver: Any | None = None def run_long_task( @@ -75,19 +73,39 @@ def run(self) -> None: thread = qt_core.QThread() worker = Worker() - handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress) + + class Receiver(qt_core.QObject): + @qt_core.Slot(str) + def handle_message(self, message: str) -> None: + _update_progress_label(progress, label, message) + + @qt_core.Slot(object) + def handle_success(self, result: Any) -> None: + on_success(result) + + @qt_core.Slot(object) + def handle_error(self, exc: Exception) -> None: + on_error(exc) + + @qt_core.Slot() + def handle_finished(self) -> None: + progress.close() + if on_finished is not None: + on_finished(handle) + + receiver = Receiver() + handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress, receiver=receiver) worker.moveToThread(thread) thread.started.connect(worker.run) - worker.message.connect(lambda message: _update_progress_label(progress, label, message)) - worker.succeeded.connect(on_success) - worker.failed.connect(on_error) + worker.message.connect(receiver.handle_message) + worker.succeeded.connect(receiver.handle_success) + worker.failed.connect(receiver.handle_error) worker.finished.connect(thread.quit) - thread.finished.connect(worker.deleteLater) + worker.finished.connect(worker.deleteLater) thread.finished.connect(thread.deleteLater) - thread.finished.connect(progress.close) - if on_finished is not None: - thread.finished.connect(lambda: on_finished(handle)) + thread.finished.connect(receiver.handle_finished) + thread.finished.connect(receiver.deleteLater) progress._eegprep_long_task = handle progress.show() @@ -104,7 +122,7 @@ def emit(self, record: logging.LogRecord) -> None: try: self.signal.emit(self.format(record)) except Exception: - logger.exception("Failed to forward long-task log message") + self.handleError(record) def _update_progress_label(progress: Any, label: str, message: str) -> None: diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py index c00fdb5e..25761f4f 100644 --- a/tests/test_gui_long_task.py +++ b/tests/test_gui_long_task.py @@ -1,8 +1,10 @@ import logging import os +import threading import pytest +import eegprep.functions.guifunc.long_task as long_task_module from eegprep.functions.guifunc.long_task import run_long_task @@ -46,23 +48,69 @@ def task(): assert "worker progress" in handle.dialog.labelText() +def test_run_long_task_callbacks_are_delivered_on_main_thread(qapp, monkeypatch): + from PySide6 import QtCore + + loop = QtCore.QEventLoop() + main_thread_id = threading.get_ident() + task_thread_ids = [] + message_thread_ids = [] + success_thread_ids = [] + original_update = long_task_module._update_progress_label + + def update_progress_label(progress, label, message): + message_thread_ids.append(threading.get_ident()) + original_update(progress, label, message) + + def task(): + task_thread_ids.append(threading.get_ident()) + logging.getLogger("eegprep.tests").info("worker progress") + return "done" + + monkeypatch.setattr(long_task_module, "_update_progress_label", update_progress_label) + + run_long_task( + parent=None, + title="Running test task", + label="Running test task.", + task=task, + on_success=lambda _result: success_thread_ids.append(threading.get_ident()), + on_error=lambda _exc: None, + on_finished=lambda _handle: loop.quit(), + ) + QtCore.QTimer.singleShot(3000, loop.quit) + loop.exec() + + assert task_thread_ids + assert task_thread_ids[0] != main_thread_id + assert message_thread_ids + assert all(thread_id == main_thread_id for thread_id in message_thread_ids) + assert success_thread_ids == [main_thread_id] + + def test_run_long_task_reports_errors(qapp): from PySide6 import QtCore loop = QtCore.QEventLoop() results = [] errors = [] + error_thread_ids = [] + main_thread_id = threading.get_ident() def task(): raise ValueError("task failed") + def on_error(exc): + error_thread_ids.append(threading.get_ident()) + errors.append(exc) + run_long_task( parent=None, title="Running test task", label="Running test task.", task=task, on_success=results.append, - on_error=errors.append, + on_error=on_error, on_finished=lambda _handle: loop.quit(), ) QtCore.QTimer.singleShot(3000, loop.quit) @@ -71,3 +119,4 @@ def task(): assert results == [] assert len(errors) == 1 assert str(errors[0]) == "task failed" + assert error_thread_ids == [main_thread_id] From 91563111df8ffd025e4a86d5e734fc635d8c56da Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 18:22:42 -0700 Subject: [PATCH 15/16] Install console deps in dev environment --- docs/source/contributing.rst | 1 + docs/source/development.rst | 6 ++++-- docs/source/user_guide/installation.rst | 4 +++- pyproject.toml | 5 +++++ tests/test_public_api_examples.py | 9 +++++++++ uv.lock | 7 +++++++ 6 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 147deecf..cd2d957c 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -58,6 +58,7 @@ If you only need documentation dependencies, sync the docs extra: - The eegprep package in editable mode - Repo tooling dependencies +- GUI and ``eegprep-console`` runtime dependencies - Documentation dependencies when ``--extra docs`` is used Code Style Guidelines diff --git a/docs/source/development.rst b/docs/source/development.rst index f04c9171..ed86b16b 100644 --- a/docs/source/development.rst +++ b/docs/source/development.rst @@ -58,8 +58,10 @@ Install the default development environment: uv sync --group dev ``uv sync`` creates ``.venv/`` and installs EEGPrep in editable mode from the -locked dependency set. Use ``uv run`` for commands so they execute inside this -environment. +locked dependency set. The development environment includes the GUI and +``eegprep-console`` runtime dependencies so ``uv run eegprep-console --full`` +works from a fresh checkout. Use ``uv run`` for commands so they execute inside +this environment. Install Documentation Dependencies ---------------------------------- diff --git a/docs/source/user_guide/installation.rst b/docs/source/user_guide/installation.rst index 25f4a0ce..11d9893d 100644 --- a/docs/source/user_guide/installation.rst +++ b/docs/source/user_guide/installation.rst @@ -75,7 +75,9 @@ To install eegprep from source for development: uv sync --group dev ``uv sync`` creates the project environment, installs EEGPrep in editable mode, -and uses ``uv.lock`` for reproducible dependency resolution. +and uses ``uv.lock`` for reproducible dependency resolution. The development +environment includes the GUI and console runtime dependencies, so a fresh +checkout can immediately launch ``uv run eegprep-console --full``. To develop or build documentation from source, include the docs extra: diff --git a/pyproject.toml b/pyproject.toml index 5fb8d58f..a2515e9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,12 @@ eegprep-validate-extension-catalog = "eegprep.extension_catalog:main" [dependency-groups] dev = [ + # Keep `uv run eegprep-console --full` working from a fresh source checkout. + # Published installs still keep GUI/console dependencies behind extras. + "ipython>=8.0", "pytest>=8.0", + "pyqtgraph>=0.13.7", + "PySide6>=6.6", "ruff>=0.15.14", "tomli>=2.0; python_version < '3.11'", "ty>=0.0.39", diff --git a/tests/test_public_api_examples.py b/tests/test_public_api_examples.py index af4b524e..525b5ad9 100644 --- a/tests/test_public_api_examples.py +++ b/tests/test_public_api_examples.py @@ -72,6 +72,15 @@ def test_project_entry_points_cover_gui_and_console() -> None: assert pyproject["project"]["scripts"]["eegprep"] == "eegprep.cli.main:main" +def test_development_dependencies_cover_console_runtime() -> None: + pyproject = _read_pyproject() + dev_dependencies = set(pyproject["dependency-groups"]["dev"]) + + assert "ipython>=8.0" in dev_dependencies + assert "pyqtgraph>=0.13.7" in dev_dependencies + assert "PySide6>=6.6" in dev_dependencies + + def test_setuptools_package_data_covers_runtime_resources() -> None: pyproject = _read_pyproject() package_root = REPO_ROOT / "src/eegprep" diff --git a/uv.lock b/uv.lock index 4a78a0a0..f100f602 100644 --- a/uv.lock +++ b/uv.lock @@ -753,6 +753,10 @@ torch = [ [package.dev-dependencies] dev = [ + { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pyqtgraph" }, + { name = "pyside6" }, { name = "pytest" }, { name = "ruff" }, { name = "tomli", marker = "python_full_version < '3.11'" }, @@ -808,6 +812,9 @@ provides-extras = ["torch", "eeglabio", "gui", "console", "docs", "all"] [package.metadata.requires-dev] dev = [ + { name = "ipython", specifier = ">=8.0" }, + { name = "pyqtgraph", specifier = ">=0.13.7" }, + { name = "pyside6", specifier = ">=6.6" }, { name = "pytest", specifier = ">=8.0" }, { name = "ruff", specifier = ">=0.15.14" }, { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, From 6cb7e14bd56b6e7a8fcaeecd5722368882019286 Mon Sep 17 00:00:00 2001 From: suraj-ranganath Date: Thu, 11 Jun 2026 18:48:49 -0700 Subject: [PATCH 16/16] Address console stack review findings --- src/eegprep/functions/guifunc/long_task.py | 54 +++++++++++++++------ src/eegprep/functions/guifunc/session.py | 3 ++ src/eegprep/functions/popfunc/pop_newset.py | 5 +- tests/test_gui_long_task.py | 31 ++++++++++++ tests/test_gui_main_window.py | 35 +++++++++++++ tests/test_gui_pop_runica.py | 15 +++++- tests/test_pop_newset.py | 20 ++++++++ 7 files changed, 145 insertions(+), 18 deletions(-) diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py index 0cefc811..a7aa9889 100644 --- a/src/eegprep/functions/guifunc/long_task.py +++ b/src/eegprep/functions/guifunc/long_task.py @@ -5,6 +5,7 @@ from collections.abc import Callable from dataclasses import dataclass import logging +import threading from typing import Any try: # pragma: no cover - depends on optional GUI dependency @@ -24,6 +25,11 @@ class LongTaskHandle: receiver: Any | None = None +_LOGGER_LOCK = threading.Lock() +_LOGGER_DEPTH = 0 +_LOGGER_OLD_LEVEL: int | None = None + + def run_long_task( *, parent: Any | None, @@ -54,22 +60,15 @@ class Worker(qt_core.QObject): def run(self) -> None: handler = _SignalLogHandler(self.message) handler.setFormatter(logging.Formatter("%(message)s")) - handler.setLevel(logging.INFO) - eegprep_logger = logging.getLogger("eegprep") - old_level = eegprep_logger.level - if old_level == logging.NOTSET or old_level > logging.INFO: - eegprep_logger.setLevel(logging.INFO) - eegprep_logger.addHandler(handler) - try: + with _ForwardEegprepLogs(handler): self.message.emit(label) - result = task() - self.succeeded.emit(result) - except Exception as exc: # noqa: BLE001 - forwarded to GUI error handler. - self.failed.emit(exc) - finally: - eegprep_logger.removeHandler(handler) - eegprep_logger.setLevel(old_level) - self.finished.emit() + try: + result = task() + self.succeeded.emit(result) + except Exception as exc: # noqa: BLE001 - forwarded to GUI error handler. + self.failed.emit(exc) + finally: + self.finished.emit() thread = qt_core.QThread() worker = Worker() @@ -125,6 +124,31 @@ def emit(self, record: logging.LogRecord) -> None: self.handleError(record) +class _ForwardEegprepLogs: + def __init__(self, handler: logging.Handler) -> None: + self.handler = handler + self.logger = logging.getLogger("eegprep") + + def __enter__(self) -> None: + global _LOGGER_DEPTH, _LOGGER_OLD_LEVEL + with _LOGGER_LOCK: + if _LOGGER_DEPTH == 0: + _LOGGER_OLD_LEVEL = self.logger.level + if self.logger.level == logging.NOTSET or self.logger.level > logging.INFO: + self.logger.setLevel(logging.INFO) + _LOGGER_DEPTH += 1 + self.logger.addHandler(self.handler) + + def __exit__(self, _exc_type: Any, _exc: Any, _tb: Any) -> None: + global _LOGGER_DEPTH, _LOGGER_OLD_LEVEL + with _LOGGER_LOCK: + self.logger.removeHandler(self.handler) + _LOGGER_DEPTH -= 1 + if _LOGGER_DEPTH == 0: + self.logger.setLevel(logging.NOTSET if _LOGGER_OLD_LEVEL is None else _LOGGER_OLD_LEVEL) + _LOGGER_OLD_LEVEL = None + + def _update_progress_label(progress: Any, label: str, message: str) -> None: message = str(message).strip() progress.setLabelText(label if not message or message == label else f"{label}\n{message}") diff --git a/src/eegprep/functions/guifunc/session.py b/src/eegprep/functions/guifunc/session.py index 6c8df35f..951081ab 100644 --- a/src/eegprep/functions/guifunc/session.py +++ b/src/eegprep/functions/guifunc/session.py @@ -253,6 +253,9 @@ def apply_workspace_state( if resolved_currentset and max(resolved_currentset) > len(resolved_alleeg): raise ValueError("CURRENTSET contains indices outside ALLEEG") resolved_eeg = self._resolve_workspace_eeg(eeg, resolved_alleeg, resolved_currentset) + current = resolved_eeg if isinstance(resolved_eeg, list) else [resolved_eeg] + if resolved_currentset and len(current) != len(resolved_currentset): + raise ValueError("EEG selection length must match CURRENTSET") self.ALLEEG = resolved_alleeg self.EEG = resolved_eeg self.CURRENTSET = resolved_currentset diff --git a/src/eegprep/functions/popfunc/pop_newset.py b/src/eegprep/functions/popfunc/pop_newset.py index 121304ed..0f43bac2 100644 --- a/src/eegprep/functions/popfunc/pop_newset.py +++ b/src/eegprep/functions/popfunc/pop_newset.py @@ -153,8 +153,9 @@ def _run_gui( "gui": "off", } comments = result.get("comments") - if comments is None: - comments = result.get("editdescription") + edited_comments = result.get("editdescription") + if comments is None and isinstance(edited_comments, str): + comments = edited_comments if comments is not None: gui_options["comments"] = str(comments) if _is_on(result.get("savenew")): diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py index 25761f4f..33b33091 100644 --- a/tests/test_gui_long_task.py +++ b/tests/test_gui_long_task.py @@ -48,6 +48,37 @@ def task(): assert "worker progress" in handle.dialog.labelText() +def test_run_long_task_restores_eegprep_logger_level_after_forwarding_progress(qapp): + from PySide6 import QtCore + + loop = QtCore.QEventLoop() + logger = logging.getLogger("eegprep") + original_level = logger.level + logger.setLevel(logging.WARNING) + + def task(): + logging.getLogger("eegprep.tests").info("worker progress") + return "done" + + try: + handle = run_long_task( + parent=None, + title="Running test task", + label="Running test task.", + task=task, + on_success=lambda _result: None, + on_error=lambda _exc: None, + on_finished=lambda _handle: loop.quit(), + ) + QtCore.QTimer.singleShot(3000, loop.quit) + loop.exec() + + assert "worker progress" in handle.dialog.labelText() + assert logger.level == logging.WARNING + finally: + logger.setLevel(original_level) + + def test_run_long_task_callbacks_are_delivered_on_main_thread(qapp, monkeypatch): from PySide6 import QtCore diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py index 99e9819f..2e21d3a2 100644 --- a/tests/test_gui_main_window.py +++ b/tests/test_gui_main_window.py @@ -360,6 +360,41 @@ def test_session_stores_multiple_selected_datasets_back_to_same_indices(self): self.assertEqual(session.CURRENTSET, [1, 2]) self.assertEqual([item["ref"] for item in session.ALLEEG], ["average", "average"]) + def test_apply_workspace_state_rejects_currentset_outside_alleeg_before_mutating(self): + session = EEGPrepSession() + session.store_current(_demo_eeg(), new=True) + original_eeg = session.EEG + original_alleeg = list(session.ALLEEG) + original_currentset = list(session.CURRENTSET) + + with self.assertRaisesRegex(ValueError, "CURRENTSET contains indices outside ALLEEG"): + session.apply_workspace_state(alleeg=[_demo_eeg()], currentset=2) + + self.assertIs(session.EEG, original_eeg) + self.assertEqual(len(session.ALLEEG), len(original_alleeg)) + self.assertIs(session.ALLEEG[0], original_alleeg[0]) + self.assertEqual(session.CURRENTSET, original_currentset) + + def test_apply_workspace_state_rejects_eeg_list_length_mismatch_before_mutating(self): + session = EEGPrepSession() + first = _demo_eeg() + second = _demo_eeg() + second["setname"] = "second" + session.store_current(first, new=True) + session.store_current(second, new=True) + original_eeg = session.EEG + original_alleeg = list(session.ALLEEG) + original_currentset = list(session.CURRENTSET) + + with self.assertRaisesRegex(ValueError, "EEG selection length must match CURRENTSET"): + session.apply_workspace_state(alleeg=[first, second], eeg=[first], currentset=[1, 2]) + + self.assertIs(session.EEG, original_eeg) + self.assertEqual(len(session.ALLEEG), len(original_alleeg)) + self.assertIs(session.ALLEEG[0], original_alleeg[0]) + self.assertIs(session.ALLEEG[1], original_alleeg[1]) + self.assertEqual(session.CURRENTSET, original_currentset) + def test_session_delete_current_selects_remaining_dataset(self): session = EEGPrepSession() first = _demo_eeg() diff --git a/tests/test_gui_pop_runica.py b/tests/test_gui_pop_runica.py index 0b05c8af..d31004e2 100644 --- a/tests/test_gui_pop_runica.py +++ b/tests/test_gui_pop_runica.py @@ -10,7 +10,7 @@ from eegprep.functions.guifunc.spec import controls_by_tag from eegprep.functions.guifunc.qt import QtDialogRenderer from eegprep.functions.popfunc.pop_loadset import pop_loadset -from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_dialog_spec +from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_dialog_spec, pop_runica_gui_options def _eeg(): @@ -96,6 +96,19 @@ def run(self, spec, initial_values=None): "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'maxsteps', 2, 'interrupt', 'on');", ) + def test_gui_options_do_not_inject_interrupt_for_non_runica_algorithms(self): + class Renderer: + def run(self, spec, initial_values=None): + return {"icatype": 4, "params": "'maxiter', 7", "reorder": True, "chantype": ""} + + options = pop_runica_gui_options(_eeg(), renderer=Renderer()) + + self.assertIsNotNone(options) + assert options is not None + self.assertEqual(options["icatype"], "picard") + self.assertEqual(options["options"], {"maxiter": 7}) + self.assertNotIn("interrupt", options["options"]) + def test_gui_result_runs_runica_and_returns_history(self): class Renderer: def run(self, spec, initial_values=None): diff --git a/tests/test_pop_newset.py b/tests/test_pop_newset.py index f4f56664..d135041f 100644 --- a/tests/test_pop_newset.py +++ b/tests/test_pop_newset.py @@ -171,6 +171,26 @@ def run(self, _spec, initial_values=None): ) +def test_pop_newset_gui_untouched_description_button_preserves_comments(): + class Renderer: + def run(self, _spec, initial_values=None): + return {"setname": "processed", "editdescription": False, "overwrite": 1} + + alleeg, current, current_set, _command = pop_newset([], _eeg(name="original"), 0) + current["comments"] = "old notes" + processed = _eeg(name="processed") + processed["comments"] = "old notes" + + alleeg, current, current_set, command = pop_newset(alleeg, processed, current_set, "gui", "on", renderer=Renderer()) + + assert len(alleeg) == 2 + assert current_set == 2 + assert current["comments"] == "old notes" + assert command == ( + "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, 'setname', 'processed', 'overwrite', 'off');" + ) + + def test_pop_newset_gui_choice_can_overwrite_current_dataset(): class Renderer: def run(self, _spec, initial_values=None):