From 9dc4af415e5e74a95ca041754176e523382e0dc8 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Mon, 8 Jun 2026 20:46:25 -0700
Subject: [PATCH 01/16] Improve EEGPrep autoreview skill

---
 .agents/skills/oc-autoreview-adapted/SKILL.md |  190 +--
 .../oc-autoreview-adapted/agents/openai.yaml  |    4 +-
 .../oc-autoreview-adapted/scripts/autoreview  | 1064 +++++++++++++----
 .../scripts/test-review-harness.ps1           |   51 +
 .../scripts/test-review-harness.py            |   62 +-
 5 files changed, 974 insertions(+), 397 deletions(-)
 create mode 100644 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1

diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md
index a0810028..763a9f4b 100644
--- a/.agents/skills/oc-autoreview-adapted/SKILL.md
+++ b/.agents/skills/oc-autoreview-adapted/SKILL.md
@@ -1,166 +1,108 @@
 ---
 name: oc-autoreview-adapted
-description: Run an autonomous EEGPrep-focused structured autoreview on local changes, branches, commits, or PRs using the bundled Codex helper. Use when the user asks for autoreview, OC autoreview, closeout review, second-pass review, final review before commit/push/PR, or when non-trivial EEGPrep code changes need a high-signal correctness, EEGLAB parity, GUI/session, tests, and repo-instruction check.
+description: Run autonomous EEGPrep-focused structured autoreview on dirty changes, branches, commits, PR stacks, or the whole EEGPrep-owned codebase; verify and fix real findings from first principles using AGENTS.md, EEGLAB parity, GUI/console, tests, docs, and security constraints.
 ---
 
 # OC Autoreview Adapted
 
-Run the bundled structured review helper as an autonomous closeout check for
-EEGPrep. This skill adapts the OpenClaw autoreview principles to this project:
-one frozen diff bundle, one structured JSON result, validated changed-file
-findings, read-only inspection, heartbeat progress, optional parallel tests, and
-repeat-until-clean behavior.
+Use the bundled helper for high-signal closeout review or whole-codebase bug hunts. It builds one bounded review bundle, runs one or more read-only reviewer engines, validates structured JSON, prints heartbeats for long runs, and exits nonzero when actionable findings remain.
 
 ## Contract
 
-- Run the helper for real unless the user explicitly asks for a plan or manual
-  review only.
-- Treat review output as advisory. Verify every accepted finding by reading the
-  real code path and adjacent files before fixing or reporting it.
-- Keep going until the helper exits cleanly with no accepted/actionable findings
-  or until you consciously reject a remaining finding with a concrete reason.
-- If a review-triggered fix changes code, rerun focused tests and rerun the
-  helper on the same target.
-- Do not run nested review tools from inside a review. The helper builds one
-  bundle, calls Codex in read-only mode, validates the result, and exits.
-- Do not push, stage, commit, or open a PR just to run autoreview. Do those only
-  when the user requested that action.
-- Be patient. The helper prints heartbeat lines such as
-  `review still running: codex elapsed=... pid=...`; those are healthy progress.
-
-## Helper
-
-Use the repo-local helper:
+- Run it for real unless the user asked only for a plan.
+- Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it.
+- Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects.
+- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, and subjective MATLAB/Python style comments.
+- If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason.
+- Do not invoke nested review tools from inside review. The helper already runs one structured review path.
+- Do not push/stage/commit/open PR unless the user requested that separately.
 
-```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview --help
-```
-
-The helper:
-
-- defaults to Codex with read-only sandboxing and web search enabled;
-- chooses dirty local changes first in `--mode auto`;
-- otherwise uses the current PR base when discoverable, then `origin/develop`;
-- accepts `--mode local`, `--mode branch --base origin/develop`, and
-  `--mode commit --commit HEAD`;
-- includes root/scoped `AGENTS.md` instructions in the review bundle;
-- validates structured JSON against an EEGPrep-specific schema;
-- filters findings to changed files only;
-- exits nonzero when accepted/actionable findings remain;
-- supports `--prompt`, `--prompt-file`, `--dataset`, `--json-output`,
-  `--output`, `--parallel-tests`, `--require-finding`, `--expect-findings`,
-  `--no-web-search`, `--model`, and `--thinking`.
-
-The smoke harness creates a temporary EEG-style fixture repo:
-
-```bash
-.agents/skills/oc-autoreview-adapted/scripts/test-review-harness --dry-run
-```
+## Commands
 
-Run the full harness only when it is acceptable to spend a real Codex review:
+Set paths once:
 
 ```bash
-.agents/skills/oc-autoreview-adapted/scripts/test-review-harness --fixture buggy
+export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview"
+export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness"
 ```
 
-## Pick Target
-
-Use the smallest target that covers the request.
-
 Dirty local work:
 
 ```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode local
+"$AUTOREVIEW" --mode local
 ```
 
-Branch or PR work:
+Branch or stacked PR work:
 
 ```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode branch --base origin/develop
+base=$(gh pr view --json baseRefName --jq .baseRefName 2>/dev/null || echo develop)
+"$AUTOREVIEW" --mode branch --base "origin/$base"
 ```
 
-If an open PR exists, prefer its actual base:
+Single committed change:
 
 ```bash
-base=$(gh pr view --json baseRefName --jq .baseRefName)
-.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode branch --base "origin/$base"
+"$AUTOREVIEW" --mode commit --commit HEAD
 ```
 
-Committed single change:
+Whole EEGPrep-owned codebase audit:
 
 ```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview --mode commit --commit HEAD
+"$AUTOREVIEW" --mode codebase --thinking codex=xhigh
 ```
 
-Do not force local mode after committing. A clean local review only proves there
-is no dirty patch.
+The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope.
+
+## Useful Options
 
-## Parallel Closeout
+- `--engine codex|claude|droid|copilot`; default is Codex.
+- `--reviewers codex,claude` or `--panel` for a multi-reviewer pass.
+- `--model codex=gpt-5.1 --thinking codex=xhigh`; Claude also accepts `max`.
+- `--stream-engine-output` to see compact live engine activity.
+- `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs.
+- `--prompt` / `--prompt-file` / `--dataset` to add evidence.
+- `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts.
+- `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing.
+- `--skip-fetch` avoids fetching before branch diffs.
+- `--heartbeat-seconds 60` controls long-run heartbeat cadence.
 
-It is OK to run focused tests concurrently with review after formatting-sensitive
-work is done:
+Smoke check:
 
 ```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview \
-  --parallel-tests "uv run pytest tests/test_pop_select.py"
+"$AUTOREVIEW_HARNESS" --dry-run
+"$AUTOREVIEW_HARNESS" --fixture buggy --engine codex
 ```
 
-If tests or review findings lead to edits, rerun the affected tests and rerun
-autoreview. Stop when the final helper run exits 0 with no accepted/actionable
-findings. Do not run another review only for cleaner wording.
+On Windows, use:
+
+```powershell
+python .agents\skills\oc-autoreview-adapted\scripts\autoreview --help
+.agents\skills\oc-autoreview-adapted\scripts\test-review-harness.ps1 -Fixture buggy -Engine codex
+```
 
 ## EEGPrep Review Surface
 
-The helper prompt asks Codex to prioritize:
-
-- correctness bugs, import/runtime failures, wrong numerical results, and broken
-  common workflows;
-- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI behavior, event
-  semantics, and expected data structures;
-- EEG dict fields including `data`, `nbchan`, `pnts`, `trials`, `srate`,
-  `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`,
-  `icaact`, `icawinv`, `icasphere`, `icaweights`, and `icachansind`;
-- MATLAB/Python indexing boundaries, especially 1-based EEGLAB latencies and
-  user-facing indices versus 0-based Python arrays;
-- channel-major shape assumptions: continuous `(nbchan, pnts)` and epoched
-  `(nbchan, pnts, trials)`;
+Prioritize:
+
+- correctness, runtime/import failures, bad numerical results, broken common workflows;
+- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI layout/behavior, events, and data structures;
+- EEG dict invariants: `data`, `nbchan`, `pnts`, `trials`, `srate`, `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`, ICA fields;
+- 1-based EEGLAB user indices/latencies versus 0-based Python array indices;
+- channel-major continuous `(nbchan, pnts)` and epoched `(nbchan, pnts, trials)` data;
 - GUI plus `eegprep-console` synchronization through `EEGPrepSession`;
-- `return_com=True`, `(EEG, com)` returns, history strings, and session update
-  paths for user-facing `pop_*` functions;
-- runtime independence from `src/eegprep/eeglab/`;
-- packaged Markdown help resources for GUI Help or `pophelp`;
-- missing tests tied to changed behavior;
-- concrete security, path, file I/O, and dependency risks;
-- realistic EEG-size performance regressions.
-
-## Triage Findings
-
-Accept findings only when they are concrete and introduced or exposed by the
-reviewed change. Reject:
-
-- pre-existing issues outside the diff;
-- generic linter/formatter comments;
-- broad refactors and speculative abstractions;
-- unlikely edge cases that would complicate the code without protecting real
-  workflows;
-- subjective MATLAB-vs-Python style preferences that do not break EEGPrep's
-  parity contract.
-
-For each accepted finding, fix the smallest ownership boundary that addresses
-the bug. For each rejected finding, record the reason briefly in the final
-report. Add an inline code comment only when it documents a real invariant that
-future reviewers need to know.
-
-## Final Report
-
-Include:
-
-- review command used;
-- tests/proof run;
-- findings accepted, fixed, or rejected, briefly why;
-- the clean result from the final helper run, or the exact remaining risk if a
-  finding was consciously left open.
-
-If the final helper run exits 0 and prints
-`autoreview clean: no accepted/actionable findings reported`, report that run as
-clean and stop.
+- `return_com=True`, `(EEG, com)` returns, history replay, and session update paths;
+- runtime independence from `src/eegprep/eeglab`;
+- packaged Markdown help for GUI Help / `pophelp`;
+- realistic EEG-size performance and concrete security/path/I/O risks.
+
+## Loop
+
+1. Format first if formatting can change line locations.
+2. Run autoreview on the smallest sufficient target.
+3. Verify each finding against code and AGENTS.md.
+4. Fix accepted findings at the right ownership boundary.
+5. Run focused tests, then broader tests if risk warrants.
+6. Rerun the same autoreview target.
+7. Final response: command used, tests run, findings fixed/rejected, and final clean result or remaining risk.
+
+If the helper prints `autoreview clean: no accepted/actionable findings reported` and exits 0, report that as clean and stop.
diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
index 08ca5ab1..7c1c3100 100644
--- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
+++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
   display_name: "OC Autoreview Adapted"
-  short_description: "EEGPrep-focused autoreview workflow"
-  default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep changes before closeout."
+  short_description: "EEGPrep autoreview for diffs or codebase audits"
+  default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real findings."
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
index 751fb963..5fdee362 100755
--- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview
+++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
-from __future__ import annotations
-
-# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper.
+# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper:
+# https://github.com/openclaw/agent-skills/tree/main/skills/autoreview
+#
 # Original copyright (c) 2026 openclaw.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -21,35 +21,31 @@ from __future__ import annotations
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+from __future__ import annotations
 
 import argparse
+import concurrent.futures
+import copy
 import json
 import os
+import queue
 import subprocess
 import sys
 import tempfile
 import textwrap
+import threading
 import time
 from pathlib import Path
-from typing import Any
+from typing import Any, Callable
 
 
+ENGINES = ("codex", "claude", "droid", "copilot")
 DEFAULT_BASE = "origin/develop"
 TRUNK_BRANCHES = {"develop", "main", "master"}
-REPORT_KEYS = {
-    "findings",
-    "overall_correctness",
-    "overall_explanation",
-    "overall_confidence",
-}
-FINDING_KEYS = {
-    "title",
-    "body",
-    "priority",
-    "confidence",
-    "category",
-    "code_location",
-}
+CODEBASE_EXCLUDED_PREFIXES = (
+    "src/eegprep/eeglab/",
+    "sample_data/",
+)
 CATEGORIES = {
     "bug",
     "security",
@@ -62,6 +58,13 @@ CATEGORIES = {
     "docs_help",
     "performance",
 }
+THINKING_LEVELS_BY_ENGINE = {
+    "codex": {"low", "medium", "high", "xhigh"},
+    "claude": {"low", "medium", "high", "xhigh", "max"},
+    "droid": set(),
+    "copilot": set(),
+}
+
 
 SCHEMA: dict[str, Any] = {
     "type": "object",
@@ -91,7 +94,10 @@ SCHEMA: dict[str, Any] = {
                     "body": {"type": "string", "minLength": 1, "maxLength": 2400},
                     "priority": {"type": "string", "enum": ["P0", "P1", "P2", "P3"]},
                     "confidence": {"type": "number", "minimum": 0, "maximum": 1},
-                    "category": {"type": "string", "enum": sorted(CATEGORIES)},
+                    "category": {
+                        "type": "string",
+                        "enum": sorted(CATEGORIES),
+                    },
                     "code_location": {
                         "type": "object",
                         "additionalProperties": False,
@@ -115,11 +121,7 @@ SCHEMA: dict[str, Any] = {
 
 
 def run(
-    args: list[str],
-    cwd: Path,
-    *,
-    input_text: str | None = None,
-    check: bool = True,
+    args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True
 ) -> subprocess.CompletedProcess[str]:
     result = subprocess.run(
         args,
@@ -130,11 +132,8 @@ def run(
         stderr=subprocess.PIPE,
     )
     if check and result.returncode != 0:
-        command = " ".join(args)
-        raise SystemExit(
-            f"command failed ({result.returncode}): {command}\n"
-            f"{result.stderr or result.stdout}"
-        )
+        cmd = " ".join(args)
+        raise SystemExit(f"command failed ({result.returncode}): {cmd}\n{result.stderr or result.stdout}")
     return result
 
 
@@ -142,15 +141,26 @@ def run_with_heartbeat(
     args: list[str],
     cwd: Path,
     *,
-    input_text: str,
+    input_text: str | None = None,
     label: str,
-    heartbeat_seconds: int,
+    heartbeat_seconds: int = 60,
+    stream_output: bool = False,
+    stream_display: Callable[[str, str], str | None] | None = None,
 ) -> subprocess.CompletedProcess[str]:
+    if stream_output:
+        return run_with_stream(
+            args,
+            cwd,
+            input_text=input_text,
+            label=label,
+            heartbeat_seconds=heartbeat_seconds,
+            stream_display=stream_display,
+        )
     started = time.monotonic()
     proc = subprocess.Popen(
         args,
         cwd=cwd,
-        stdin=subprocess.PIPE,
+        stdin=subprocess.PIPE if input_text is not None else None,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         text=True,
@@ -166,11 +176,87 @@ def run_with_heartbeat(
         except subprocess.TimeoutExpired:
             first_communicate = False
             elapsed = int(time.monotonic() - started)
-            print(
-                f"review still running: {label} elapsed={elapsed}s pid={proc.pid}",
-                file=sys.stderr,
-                flush=True,
-            )
+            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
+
+
+def run_with_stream(
+    args: list[str],
+    cwd: Path,
+    *,
+    input_text: str | None,
+    label: str,
+    heartbeat_seconds: int,
+    stream_display: Callable[[str, str], str | None] | None,
+) -> subprocess.CompletedProcess[str]:
+    started = time.monotonic()
+    proc = subprocess.Popen(
+        args,
+        cwd=cwd,
+        stdin=subprocess.PIPE if input_text is not None else None,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        bufsize=1,
+    )
+    events: queue.Queue[tuple[str, str | None]] = queue.Queue()
+    stdout_parts: list[str] = []
+    stderr_parts: list[str] = []
+
+    def read_stream(name: str, stream: Any) -> None:
+        try:
+            for line in iter(stream.readline, ""):
+                events.put((name, line))
+        finally:
+            events.put((name, None))
+
+    def write_stdin() -> None:
+        if proc.stdin is None or input_text is None:
+            return
+        try:
+            proc.stdin.write(input_text)
+            proc.stdin.close()
+        except BrokenPipeError:
+            return
+
+    threads = [
+        threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True),
+        threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True),
+    ]
+    for thread in threads:
+        thread.start()
+    stdin_thread = threading.Thread(target=write_stdin, daemon=True)
+    stdin_thread.start()
+
+    open_streams = 2
+    while open_streams:
+        try:
+            name, line = events.get(timeout=heartbeat_seconds)
+        except queue.Empty:
+            elapsed = int(time.monotonic() - started)
+            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
+            continue
+        if line is None:
+            open_streams -= 1
+            continue
+        if name == "stdout":
+            stdout_parts.append(line)
+        else:
+            stderr_parts.append(line)
+        display = stream_display(name, line) if stream_display else line
+        if display:
+            target = sys.stdout if name == "stdout" else sys.stderr
+            target.write(display)
+            target.flush()
+
+    for thread in threads:
+        thread.join()
+    stdin_thread.join(timeout=1)
+    returncode = proc.wait()
+    return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts))
+
+
+def git(repo: Path, *args: str, check: bool = True) -> str:
+    return run([resolve_command("git", repo), *args], repo, check=check).stdout
 
 
 def repo_root() -> Path:
@@ -200,13 +286,8 @@ def discover_repo_root(start: Path) -> Path | None:
         current = current.parent
 
 
-def git(repo: Path, *args: str, check: bool = True) -> str:
-    return run([resolve_command("git", repo), *args], repo, check=check).stdout
-
-
 def current_branch(repo: Path) -> str:
-    branch = git(repo, "branch", "--show-current", check=False).strip()
-    return branch or "detached"
+    return git(repo, "branch", "--show-current", check=False).strip() or "detached"
 
 
 def is_dirty(repo: Path) -> bool:
@@ -214,17 +295,19 @@ def is_dirty(repo: Path) -> bool:
 
 
 def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]:
-    normalized = "local" if mode == "uncommitted" else mode
+    mode = "local" if mode == "uncommitted" else mode
     branch = current_branch(repo)
-    if normalized == "local" or (normalized == "auto" and is_dirty(repo)):
+    if mode == "local" or (mode == "auto" and is_dirty(repo)):
         return "local", None
-    if normalized == "commit":
+    if mode == "codebase":
+        return "codebase", None
+    if mode == "commit":
         return "commit", None
-    if normalized == "branch" or (normalized == "auto" and branch not in TRUNK_BRANCHES):
+    if mode == "branch" or (mode == "auto" and branch not in TRUNK_BRANCHES):
         return "branch", base_ref or detect_pr_base(repo) or DEFAULT_BASE
     raise SystemExit(
         "no review target: clean trunk checkout and no forced mode. "
-        "Pass --mode branch --base <ref> or --mode commit --commit <ref>."
+        "Pass --mode codebase, --mode branch --base <ref>, or --mode commit --commit <ref>."
     )
 
 
@@ -232,24 +315,16 @@ def detect_pr_base(repo: Path) -> str | None:
     gh_bin = find_command("gh", repo)
     if not gh_bin:
         return None
-    result = run(
-        [gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"],
-        repo,
-        check=False,
-    )
+    result = run([gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], repo, check=False)
     base = result.stdout.strip()
-    if result.returncode != 0 or not base:
-        return None
-    return f"origin/{base}"
+    return f"origin/{base}" if result.returncode == 0 and base else None
 
 
 def resolve_command(name: str, repo: Path) -> str:
     resolved = find_command(name, repo)
     if resolved:
         return resolved
-    raise SystemExit(
-        f"executable not found: {name}. Install it or pass an explicit trusted path."
-    )
+    raise SystemExit(f"executable not found: {name}. Install it or pass an explicit trusted path when supported.")
 
 
 def find_command(name: str, repo: Path) -> str | None:
@@ -276,18 +351,18 @@ def find_command(name: str, repo: Path) -> str | None:
     return None
 
 
+def is_within(path: Path, root: Path) -> bool:
+    return path == root or path.is_relative_to(root)
+
+
 def has_directory_component(name: str, command: Path) -> bool:
     separators = [separator for separator in (os.sep, os.altsep) if separator]
-    return command.is_absolute() or bool(command.drive) or any(
-        separator in name for separator in separators
-    )
+    return command.is_absolute() or bool(command.drive) or any(separator in name for separator in separators)
 
 
 def first_executable_candidate(path: Path, *, reject_root: Path | None = None) -> str | None:
     if os.name == "nt" and not path.suffix:
-        extensions = [
-            ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext
-        ]
+        extensions = [ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext]
         candidates = [path.with_suffix(ext.lower()) for ext in extensions]
         candidates.extend(path.with_suffix(ext.upper()) for ext in extensions)
         candidates.append(path)
@@ -305,11 +380,7 @@ def first_executable_candidate(path: Path, *, reject_root: Path | None = None) -
     return None
 
 
-def is_within(path: Path, root: Path) -> bool:
-    return path == root or path.is_relative_to(root)
-
-
-def bounded(text: str, limit: int = 200_000) -> str:
+def bounded(text: str, limit: int = 180_000) -> str:
     if len(text) <= limit:
         return text
     return text[:limit] + f"\n\n[truncated at {limit} characters]\n"
@@ -322,14 +393,15 @@ def bounded_field(text: str, limit: int) -> str:
     return text[: max(0, limit - len(suffix))] + suffix
 
 
-def read_text(path: Path, limit: int = 50_000) -> str:
+def read_text(path: Path, limit: int = 40_000) -> str:
     try:
         data = path.read_bytes()
     except OSError as exc:
         return f"[unreadable: {exc}]"
     if b"\0" in data:
         return "[binary file omitted]"
-    return bounded(data.decode("utf-8", errors="replace"), limit)
+    text = data.decode("utf-8", errors="replace")
+    return bounded(text, limit)
 
 
 def local_bundle(repo: Path) -> str:
@@ -343,13 +415,12 @@ def local_bundle(repo: Path) -> str:
         git(repo, "diff", "--stat"),
         bounded(git(repo, "diff", "--patch", "--find-renames")),
     ]
-    untracked = [
-        line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line
-    ]
+    untracked = [line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line]
     if untracked:
         parts.append("# Untracked Files")
         for rel in untracked:
-            parts.append(f"## {rel}\n{read_text(repo / rel)}")
+            path = repo / rel
+            parts.append(f"## {rel}\n{read_text(path)}")
     return "\n\n".join(parts)
 
 
@@ -366,6 +437,37 @@ def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str:
     )
 
 
+def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]:
+    prefixes = [scope.strip().rstrip("/") for scope in scopes or [] if scope.strip()]
+    paths: set[str] = set()
+    for rel in git(repo, "ls-files").splitlines():
+        if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES):
+            continue
+        if prefixes and not any(rel == prefix or rel.startswith(f"{prefix}/") for prefix in prefixes):
+            continue
+        paths.add(rel)
+    return paths
+
+
+def codebase_bundle(repo: Path, paths: set[str]) -> str:
+    grouped: dict[str, int] = {}
+    for rel in paths:
+        top = rel.split("/", 1)[0]
+        grouped[top] = grouped.get(top, 0) + 1
+    inventory = "\n".join(f"- {path}" for path in sorted(paths))
+    summary = "\n".join(f"- {name}: {count} tracked files" for name, count in sorted(grouped.items()))
+    return "\n\n".join(
+        [
+            "# Codebase Audit",
+            "This is a repository-wide audit, not a diff review. The reviewer may inspect in-scope files with read-only tools.",
+            "# Scope Summary",
+            summary or "[no tracked files]",
+            "# In-Scope Tracked Files",
+            bounded(inventory, 120_000),
+        ]
+    )
+
+
 def commit_bundle(repo: Path, commit_ref: str) -> str:
     return "\n\n".join(
         [
@@ -379,6 +481,8 @@ def commit_bundle(repo: Path, commit_ref: str) -> str:
 
 def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: str) -> set[str]:
     names: set[str] = set()
+    if target == "codebase":
+        return codebase_paths(repo)
     if target == "local":
         sources = [
             git(repo, "diff", "--name-only", "--cached"),
@@ -386,8 +490,7 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st
             git(repo, "ls-files", "--others", "--exclude-standard"),
         ]
     elif target == "branch":
-        if target_ref is None:
-            raise SystemExit("internal error: branch target missing base ref")
+        assert target_ref
         sources = [git(repo, "diff", "--name-only", f"{target_ref}...HEAD")]
     else:
         sources = [git(repo, "show", "--name-only", "--format=", commit_ref)]
@@ -399,9 +502,28 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st
     return names
 
 
-def instruction_paths(repo: Path, changed_paths: set[str]) -> list[Path]:
+def load_extra_prompt(args: argparse.Namespace) -> str:
+    chunks: list[str] = []
+    for value in args.prompt or []:
+        chunks.append(value)
+    for path in args.prompt_file or []:
+        chunks.append(Path(path).read_text())
+    return "\n\n".join(chunks)
+
+
+def load_datasets(args: argparse.Namespace) -> str:
+    chunks: list[str] = []
+    for spec in args.dataset or []:
+        path = Path(spec)
+        if path.is_dir():
+            raise SystemExit(f"--dataset must be a file, got directory: {path}")
+        chunks.append(f"# Dataset: {path}\n{read_text(path)}")
+    return "\n\n".join(chunks)
+
+
+def instruction_paths(repo: Path, scope_paths: set[str]) -> list[Path]:
     paths = {repo / "AGENTS.md"}
-    for rel in changed_paths:
+    for rel in scope_paths:
         rel_path = Path(rel)
         if rel_path.is_absolute() or ".." in rel_path.parts:
             continue
@@ -416,106 +538,76 @@ def instruction_paths(repo: Path, changed_paths: set[str]) -> list[Path]:
     return sorted(path for path in paths if path.exists())
 
 
-def instruction_bundle(repo: Path, changed_paths: set[str]) -> str:
-    paths = instruction_paths(repo, changed_paths)
+def instruction_bundle(repo: Path, scope_paths: set[str]) -> str:
+    paths = instruction_paths(repo, scope_paths)
     if not paths:
         return "# Repository Instructions\n[no AGENTS.md files found]"
     parts = ["# Repository Instructions"]
     for path in paths:
         rel = path.relative_to(repo)
-        parts.append(f"## {rel}\n{read_text(path)}")
+        parts.append(f"## {rel}\n{read_text(path, limit=80_000)}")
     return "\n\n".join(parts)
 
 
-def load_extra_prompt(args: argparse.Namespace) -> str:
-    chunks: list[str] = []
-    for value in args.prompt or []:
-        chunks.append(value)
-    for path in args.prompt_file or []:
-        chunks.append(Path(path).read_text())
-    return "\n\n".join(chunks)
-
-
-def load_datasets(args: argparse.Namespace) -> str:
-    chunks: list[str] = []
-    for spec in args.dataset or []:
-        path = Path(spec)
-        if path.is_dir():
-            raise SystemExit(f"--dataset must be a file, got directory: {path}")
-        chunks.append(f"# Dataset: {path}\n{read_text(path)}")
-    return "\n\n".join(chunks)
-
-
 def build_prompt(
     repo: Path,
     target: str,
     target_ref: str | None,
-    changed_paths: set[str],
+    scope_paths: set[str],
     instructions: str,
     bundle: str,
     extra_prompt: str,
     datasets: str,
 ) -> str:
     target_line = f"{target} {target_ref}" if target_ref else target
-    changed = "\n".join(f"- {path}" for path in sorted(changed_paths)) or "[no changed paths]"
+    scope_label = "Changed Paths" if target != "codebase" else "Review Scope"
+    scope_list = "\n".join(f"- {path}" for path in sorted(scope_paths)) or "[no paths]"
+    scope_rule = (
+        "Report only actionable defects introduced or exposed by this change."
+        if target != "codebase"
+        else (
+            "Report concrete actionable defects in the in-scope EEGPrep-owned codebase. "
+            "Do not require diff provenance, but do avoid stale vendored/reference code and vague wishlist items."
+        )
+    )
     return textwrap.dedent(
         f"""
-        You are a senior EEGPrep code reviewer. Review the provided git change bundle only.
-        Be autonomous: inspect files as needed, reason through the changed behavior, and return a
-        structured result without asking follow-up questions.
+        You are a senior EEGPrep code reviewer. Review the provided bundle and inspect files as needed.
 
         Hard rules:
         - Return exactly one JSON object and nothing else. Do not wrap it in Markdown.
         - The JSON object must match this schema exactly:
         {json.dumps(SCHEMA, indent=2)}
         - Do not modify files.
-        - Do not invoke nested reviewers or review tools. Forbidden commands include:
-          codex review, autoreview, oracle review, and any reviewer-panel workflow.
-        - You may use read-only tools and web search to inspect source files, dependency docs,
-          EEGLAB reference behavior, current APIs, and security implications.
-        - Shell commands, if available, must be read-only inspection commands. Do not run tests,
-          formatters, package installs, generators, git mutation commands, or commands that write files.
-        - Report only actionable defects introduced or exposed by this change.
-        - Prefer high-signal findings over style feedback. False positives waste maintainer time.
+        - Do not invoke nested reviewers or review tools.
+        - Forbidden nested review commands include: codex review, autoreview, claude review, oracle review.
+        - You may use read-only tools and web search to inspect files, dependency contracts, upstream docs, current behavior, and security implications.
+        - Shell commands, if available, must be read-only inspection commands. Do not run tests, formatters, package installs, generators, network mutation commands, git mutation commands, or commands that write files.
+        - {scope_rule}
+        - Prefer high-signal findings over style feedback.
+        - Include security findings only for concrete risks: injection, secret leaks, authz/authn bypass, path traversal, unsafe deserialization, unsafe filesystem/shell use, privacy leaks, and credential handling.
+        - Do not reject legitimate functionality merely because it touches shell, filesystem, network, auth, or sensitive data. Report a security finding only when the patch creates a concrete exploitable risk, removes an important safety check, or lacks validation at a trust boundary.
         - For each finding, use the smallest file/line location that demonstrates the issue.
         - If there are no actionable findings, return an empty findings array and mark the patch correct.
 
         EEGPrep review priorities:
         - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows.
-        - EEGLAB parity regressions in public API behavior, pop_* wrappers, history commands,
-          GUI layout/behavior, event semantics, and expected data structures.
-        - EEG dict semantics for data, nbchan, pnts, trials, srate, xmin, xmax, times,
-          chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere,
-          icaweights, and icachansind.
-        - MATLAB/Python boundary mistakes, especially 1-based EEGLAB event latencies and
-          user-facing indices versus 0-based Python arrays.
-        - Channel-major data shape assumptions: continuous data is usually (nbchan, pnts),
-          and epoched data is usually (nbchan, pnts, trials).
-        - GUI plus eegprep-console session sync: EEG, ALLEEG, CURRENTSET, LASTCOM, ALLCOM,
-          STUDY, and CURRENTSTUDY must stay synchronized through EEGPrepSession helpers.
-        - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings,
-          and GUI/session update paths.
-        - Runtime code must not depend on src/eegprep/eeglab existing. Use it only as a
-          development reference.
-        - User-facing GUI Help or pophelp behavior needs packaged Markdown help resources.
-        - Tests should cover realistic regressions. Suggest exact missing tests only when the
-          gap is tied to changed behavior.
-        - Security findings must be concrete: path traversal, unsafe shell/filesystem use,
-          unsafe deserialization, credential/privacy leaks, or trust-boundary validation loss.
-        - Performance findings must be realistic for EEG data sizes.
-
-        Do not flag:
-        - Pre-existing issues outside the reviewed change.
-        - Generic linter/formatter comments.
-        - Broad refactors or speculative abstractions.
-        - Unlikely edge cases that would complicate the code without protecting real workflows.
-        - Subjective MATLAB-vs-Python style preferences unless they break EEGPrep's parity contract.
+        - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures.
+        - EEG dict fields: data, nbchan, pnts, trials, srate, xmin, xmax, times, chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere, icaweights, icachansind.
+        - MATLAB/Python indexing boundaries: EEGLAB event latencies and user-facing indices are usually 1-based; Python arrays are 0-based.
+        - Channel-major data shape: continuous (nbchan, pnts), epoched (nbchan, pnts, trials).
+        - GUI plus eegprep-console session sync through EEGPrepSession.
+        - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings, and GUI/session update paths.
+        - Runtime code must not depend on src/eegprep/eeglab existing; use it only as a development reference.
+        - GUI Help/pophelp needs EEGPrep-owned packaged Markdown resources.
+        - Missing tests only when tied to a concrete changed or audited behavior.
+        - Realistic EEG-size performance regressions.
 
         Review target: {target_line}
         Repository: {repo}
 
-        # Changed Paths
-        {changed}
+        # {scope_label}
+        {scope_list}
 
         {extra_prompt}
 
@@ -537,6 +629,10 @@ def write_json_temp(data: dict[str, Any]) -> Path:
 
 
 def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if not args.tools:
+        raise SystemExit(
+            "--no-tools is not supported by the Codex engine; use --engine claude --no-tools for a no-tools run"
+        )
     schema_path = write_json_temp(SCHEMA)
     output_path = Path(tempfile.NamedTemporaryFile("w", suffix=".json", delete=False).name)
     cmd = [resolve_command(args.codex_bin, repo), "--ask-for-approval", "never"]
@@ -546,9 +642,11 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
         cmd.extend(["--model", args.model])
     if args.thinking:
         cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"'])
+    cmd.append("exec")
+    if args.stream_engine_output:
+        cmd.append("--json")
     cmd.extend(
         [
-            "exec",
             "--ephemeral",
             "-C",
             str(repo),
@@ -567,6 +665,8 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
         input_text=prompt,
         label="codex",
         heartbeat_seconds=args.heartbeat_seconds,
+        stream_output=args.stream_engine_output,
+        stream_display=CodexStreamDisplay() if args.stream_engine_output else None,
     )
     try:
         output = output_path.read_text()
@@ -578,6 +678,258 @@ def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
     return output or result.stdout
 
 
+def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    cmd = [
+        resolve_command(args.claude_bin, repo),
+        "--print",
+        "--no-session-persistence",
+        "--output-format",
+        "stream-json" if args.stream_engine_output else "json",
+        "--json-schema",
+        json.dumps(SCHEMA),
+    ]
+    if args.tools:
+        cmd.extend(["--allowedTools", claude_allowed_tools(args)])
+    else:
+        cmd.extend(["--tools", ""])
+    if args.stream_engine_output:
+        cmd.append("--verbose")
+    if args.model:
+        cmd.extend(["--model", args.model])
+    if args.thinking:
+        cmd.extend(["--effort", args.thinking])
+    result = run_with_heartbeat(
+        cmd,
+        repo,
+        input_text=prompt,
+        label="claude",
+        heartbeat_seconds=args.heartbeat_seconds,
+        stream_output=args.stream_engine_output,
+        stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None,
+    )
+    if result.returncode != 0:
+        raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}")
+    return result.stdout
+
+
+def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if args.thinking:
+        raise SystemExit("--thinking is not supported by the droid engine")
+    prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name)
+    prompt_path.write_text(prompt)
+    cmd = [
+        resolve_command(args.droid_bin, repo),
+        "exec",
+        "--cwd",
+        str(repo),
+        "--output-format",
+        "json",
+        "-f",
+        str(prompt_path),
+    ]
+    if args.model:
+        cmd.extend(["--model", args.model])
+    if not args.tools:
+        cmd.extend(["--disabled-tools", "*"])
+    result = run_with_heartbeat(
+        cmd,
+        repo,
+        label="droid",
+        heartbeat_seconds=args.heartbeat_seconds,
+        stream_output=args.stream_engine_output,
+    )
+    prompt_path.unlink(missing_ok=True)
+    if result.returncode != 0:
+        raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}")
+    return result.stdout
+
+
+def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if args.thinking:
+        raise SystemExit("--thinking is not supported by the copilot engine")
+    if not args.tools:
+        raise SystemExit(
+            "--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv"
+        )
+    with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir:
+        prompt_path = Path(tempdir) / "prompt.txt"
+        prompt_path.write_text(prompt)
+        os.chmod(prompt_path, 0o600)
+        cmd = [
+            resolve_command(args.copilot_bin, repo),
+            "-C",
+            tempdir,
+            "-p",
+            "Read ./prompt.txt and follow it exactly. Return only the requested JSON object.",
+            "--output-format",
+            "json",
+            "--stream",
+            "on" if args.stream_engine_output else "off",
+            "--no-ask-user",
+            "--disable-builtin-mcps",
+        ]
+        if args.model:
+            cmd.extend(["--model", args.model])
+        cmd.extend(
+            [
+                "--available-tools=read_agent,rg,view,web_fetch",
+                "--allow-tool=read_agent",
+                "--allow-tool=rg",
+                "--allow-tool=view",
+                "--allow-tool=web_fetch",
+            ]
+        )
+        if args.web_search:
+            cmd.append("--allow-all-urls")
+        result = run_with_heartbeat(
+            cmd,
+            Path(tempdir),
+            label="copilot",
+            heartbeat_seconds=args.heartbeat_seconds,
+            stream_output=args.stream_engine_output,
+        )
+    if result.returncode != 0:
+        raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}")
+    return result.stdout
+
+
+class CodexStreamDisplay:
+    def __init__(self, *, activity_seconds: int = 20) -> None:
+        self.activity_seconds = activity_seconds
+        self.hidden_events = 0
+        self.last_visible = time.monotonic()
+
+    def __call__(self, name: str, line: str) -> str | None:
+        if name != "stdout":
+            return line
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            return self.visible(line)
+        event_type = event.get("type")
+        if event_type == "thread.started":
+            return self.visible(f"codex thread: {event.get('thread_id', '<unknown>')}\n")
+        if event_type == "turn.started":
+            return self.visible("codex turn started\n")
+        if event_type == "turn.completed":
+            usage = event.get("usage")
+            message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n"
+            return self.visible(self.flush_hidden() + message)
+        item = event.get("item")
+        if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str):
+            return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n")
+        return self.hidden_activity()
+
+    def hidden_activity(self) -> str | None:
+        self.hidden_events += 1
+        if time.monotonic() - self.last_visible < self.activity_seconds:
+            return None
+        return self.visible(self.flush_hidden())
+
+    def flush_hidden(self) -> str:
+        if not self.hidden_events:
+            return ""
+        count = self.hidden_events
+        self.hidden_events = 0
+        return f"codex activity: {count} hidden tool/status events\n"
+
+    def visible(self, text: str) -> str:
+        self.last_visible = time.monotonic()
+        return text
+
+
+class ClaudeStreamDisplay:
+    def __init__(self, *, activity_seconds: int = 20) -> None:
+        self.activity_seconds = activity_seconds
+        self.hidden_events = 0
+        self.last_visible = time.monotonic()
+        self.started = False
+
+    def __call__(self, name: str, line: str) -> str | None:
+        if name != "stdout":
+            return line
+        try:
+            event = json.loads(line)
+        except json.JSONDecodeError:
+            return self.visible(line)
+        event_type = event.get("type")
+        if event_type == "system" and not self.started:
+            self.started = True
+            return self.visible("claude turn started\n")
+        if event_type == "assistant":
+            return self.assistant_message(event)
+        if event_type == "result":
+            return self.visible(self.flush_hidden() + self.result_summary(event))
+        return self.hidden_activity()
+
+    def assistant_message(self, event: dict[str, Any]) -> str | None:
+        message = event.get("message")
+        if not isinstance(message, dict):
+            return self.hidden_activity()
+        chunks: list[str] = []
+        for item in message.get("content", []):
+            if not isinstance(item, dict):
+                continue
+            if item.get("type") == "text" and isinstance(item.get("text"), str):
+                chunks.append(item["text"].rstrip())
+        if chunks:
+            return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n")
+        return self.hidden_activity()
+
+    def result_summary(self, event: dict[str, Any]) -> str:
+        usage = event.get("usage")
+        fields: list[str] = []
+        if isinstance(usage, dict):
+            for key in (
+                "input_tokens",
+                "cache_read_input_tokens",
+                "cache_creation_input_tokens",
+                "output_tokens",
+            ):
+                value = usage.get(key)
+                if isinstance(value, int):
+                    fields.append(f"{key}={value}")
+        cost = event.get("total_cost_usd")
+        if isinstance(cost, (int, float)) and not isinstance(cost, bool):
+            fields.append(f"cost_usd={cost:.6f}")
+        return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n"
+
+    def hidden_activity(self) -> str | None:
+        self.hidden_events += 1
+        if time.monotonic() - self.last_visible < self.activity_seconds:
+            return None
+        return self.visible(self.flush_hidden())
+
+    def flush_hidden(self) -> str:
+        if not self.hidden_events:
+            return ""
+        count = self.hidden_events
+        self.hidden_events = 0
+        return f"claude activity: {count} hidden tool/status events\n"
+
+    def visible(self, text: str) -> str:
+        self.last_visible = time.monotonic()
+        return text
+
+
+def format_codex_usage(usage: dict[str, Any]) -> str:
+    fields = [
+        "input_tokens",
+        "cached_input_tokens",
+        "output_tokens",
+        "reasoning_output_tokens",
+    ]
+    parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)]
+    return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable"
+
+
+def claude_allowed_tools(args: argparse.Namespace) -> str:
+    tools = [tool.strip() for tool in args.claude_allowed_tools.split(",") if tool.strip()]
+    if not args.web_search:
+        tools = [tool for tool in tools if tool not in {"WebSearch", "WebFetch"}]
+    return ",".join(tools)
+
+
 def extract_json(text: str) -> dict[str, Any]:
     stripped = text.strip()
     if not stripped:
@@ -585,9 +937,9 @@ def extract_json(text: str) -> dict[str, Any]:
     try:
         parsed = json.loads(stripped)
     except json.JSONDecodeError as exc:
-        candidate = parse_json_candidate(stripped)
-        if isinstance(candidate, dict) and "findings" in candidate:
-            return candidate
+        fenced_report = parse_json_candidate(stripped)
+        if isinstance(fenced_report, dict) and "findings" in fenced_report:
+            return fenced_report
         jsonl_report = extract_json_from_jsonl(stripped)
         if jsonl_report:
             return jsonl_report
@@ -656,12 +1008,9 @@ def parse_json_candidate(text: str) -> Any | None:
     return parsed
 
 
-def validate_report(
-    report: dict[str, Any],
-    changed_paths: set[str],
-    required: list[str],
-) -> None:
-    extra_top = set(report) - REPORT_KEYS
+def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None:
+    allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"}
+    extra_top = set(report) - allowed_top
     if extra_top:
         raise SystemExit(f"review JSON has unexpected top-level keys: {sorted(extra_top)}")
     for key in SCHEMA["required"]:
@@ -677,21 +1026,47 @@ def validate_report(
         raise SystemExit("review JSON overall_explanation is too long")
     if not number_in_range(report.get("overall_confidence")):
         raise SystemExit("review JSON overall_confidence must be numeric")
-
+    finding_text = ""
     kept_findings: list[dict[str, Any]] = []
     ignored_findings: list[tuple[int, dict[str, Any], str, int]] = []
-    finding_text = ""
     for index, finding in enumerate(report["findings"]):
-        validate_finding(index, finding)
-        location = finding["code_location"]
-        rel = str(location["file_path"]).strip()
-        line = int(location["line"])
+        if not isinstance(finding, dict):
+            raise SystemExit(f"finding {index} must be an object")
+        allowed_finding = {"title", "body", "priority", "confidence", "category", "code_location"}
+        extra_finding = set(finding) - allowed_finding
+        if extra_finding:
+            raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}")
+        for key in allowed_finding:
+            if key not in finding:
+                raise SystemExit(f"finding {index} missing required key: {key}")
+        title = finding.get("title")
+        if not isinstance(title, str) or not title or len(title) > 140:
+            raise SystemExit(f"finding {index} has invalid title")
+        body = finding.get("body")
+        if not isinstance(body, str) or not body or len(body) > 2400:
+            raise SystemExit(f"finding {index} has invalid body")
+        priority = finding.get("priority")
+        if priority not in {"P0", "P1", "P2", "P3"}:
+            raise SystemExit(f"finding {index} has invalid priority: {priority}")
+        if not number_in_range(finding.get("confidence")):
+            raise SystemExit(f"finding {index} has invalid confidence")
+        category = finding.get("category")
+        if category not in CATEGORIES:
+            raise SystemExit(f"finding {index} has invalid category: {category}")
+        location = finding.get("code_location")
+        if not isinstance(location, dict):
+            raise SystemExit(f"finding {index} missing code_location")
+        rel = str(location.get("file_path", "")).strip()
+        line = location.get("line")
+        if not rel or not isinstance(line, int) or line < 1:
+            raise SystemExit(f"finding {index} has invalid location: {location}")
+        if Path(rel).is_absolute() or ".." in Path(rel).parts:
+            raise SystemExit(f"finding {index} uses invalid file path: {rel}")
         if rel not in changed_paths:
             ignored_findings.append((index, finding, rel, line))
             continue
         kept_findings.append(finding)
         finding_text += "\n" + json.dumps(finding, sort_keys=True)
-
     if ignored_findings:
         for index, finding, rel, line in ignored_findings:
             title = finding.get("title", "<untitled>")
@@ -706,118 +1081,317 @@ def validate_report(
             explanation = report["overall_explanation"].rstrip()
             report["overall_correctness"] = "patch is correct"
             report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000)
-
     haystack = finding_text.lower()
     for needle in required:
         if needle.lower() not in haystack:
             raise SystemExit(f"required finding text not found: {needle}")
 
 
-def validate_finding(index: int, finding: Any) -> None:
-    if not isinstance(finding, dict):
-        raise SystemExit(f"finding {index} must be an object")
-    extra_finding = set(finding) - FINDING_KEYS
-    if extra_finding:
-        raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}")
-    for key in FINDING_KEYS:
-        if key not in finding:
-            raise SystemExit(f"finding {index} missing required key: {key}")
-    title = finding.get("title")
-    if not isinstance(title, str) or not title or len(title) > 140:
-        raise SystemExit(f"finding {index} has invalid title")
-    body = finding.get("body")
-    if not isinstance(body, str) or not body or len(body) > 2400:
-        raise SystemExit(f"finding {index} has invalid body")
-    priority = finding.get("priority")
-    if priority not in {"P0", "P1", "P2", "P3"}:
-        raise SystemExit(f"finding {index} has invalid priority: {priority}")
-    if not number_in_range(finding.get("confidence")):
-        raise SystemExit(f"finding {index} has invalid confidence")
-    category = finding.get("category")
-    if category not in CATEGORIES:
-        raise SystemExit(f"finding {index} has invalid category: {category}")
-    location = finding.get("code_location")
-    if not isinstance(location, dict):
-        raise SystemExit(f"finding {index} missing code_location")
-    rel = str(location.get("file_path", "")).strip()
-    line = location.get("line")
-    if not rel or not isinstance(line, int) or line < 1:
-        raise SystemExit(f"finding {index} has invalid location: {location}")
-    path = Path(rel)
-    if path.is_absolute() or ".." in path.parts:
-        raise SystemExit(f"finding {index} uses invalid file path: {rel}")
-
-
 def number_in_range(value: Any) -> bool:
     return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1
 
 
-def print_report(report: dict[str, Any]) -> None:
+def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None:
     findings = report["findings"]
     if findings:
-        print(f"autoreview findings: {len(findings)}")
+        print(f"{label} findings: {len(findings)}")
     elif report["overall_correctness"] == "patch is incorrect":
-        print("autoreview verdict: patch is incorrect without discrete findings")
+        print(f"{label} verdict: patch is incorrect without discrete findings")
     else:
-        print("autoreview clean: no accepted/actionable findings reported")
+        print(f"{label} clean: no accepted/actionable findings reported")
     for finding in findings:
         loc = finding["code_location"]
-        print(f"[{finding['priority']}] {finding['title']} ({finding['category']})")
+        print(f"[{finding['priority']}] {finding['title']}")
         print(f"{loc['file_path']}:{loc['line']}")
-        print(finding["body"])
+        print(f"{finding['body']}")
         print()
     print(f"overall: {report['overall_correctness']} ({report['overall_confidence']})")
     print(report["overall_explanation"])
 
 
-def start_parallel_tests(command: str, repo: Path) -> tuple[subprocess.Popen[Any], float]:
+def start_parallel_tests(command: str, repo: Path, shell_kind: str) -> tuple[subprocess.Popen, float]:
     print(f"tests: {command}")
-    return subprocess.Popen(command, cwd=repo, shell=True), time.time()
-
-
-def finish_parallel_tests(proc: subprocess.Popen[Any], started: float) -> int:
+    if shell_kind == "default" or shell_kind == "cmd":
+        return subprocess.Popen(command, cwd=repo, shell=True), time.time()
+    if shell_kind == "powershell":
+        powershell = resolve_command("powershell", repo)
+        return subprocess.Popen(
+            [powershell, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
+            cwd=repo,
+        ), time.time()
+    if shell_kind == "pwsh":
+        pwsh = resolve_command("pwsh", repo)
+        return subprocess.Popen(
+            [pwsh, "-NoProfile", "-Command", command],
+            cwd=repo,
+        ), time.time()
+    raise SystemExit(f"invalid --parallel-tests-shell/AUTOREVIEW_PARALLEL_TESTS_SHELL: {shell_kind}")
+
+
+def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int:
     proc.wait()
     print(f"tests exit: {proc.returncode} after {int(time.time() - started)}s")
     return int(proc.returncode or 0)
 
 
-def parse_args(argv: list[str]) -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="EEGPrep bundle-driven autonomous code review.")
-    parser.add_argument("--mode", choices=["auto", "local", "uncommitted", "branch", "commit"], default="auto")
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Bundle-driven AI code review.")
+    parser.add_argument(
+        "--mode",
+        choices=["auto", "local", "uncommitted", "branch", "commit", "codebase"],
+        default="auto",
+    )
     parser.add_argument("--base")
     parser.add_argument("--commit", default="HEAD")
-    parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex"))
-    parser.add_argument("--model", default=os.environ.get("AUTOREVIEW_MODEL"))
+    parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.")
+    parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex"))
+    parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.")
+    parser.add_argument(
+        "--panel",
+        action="store_true",
+        help="Run a Codex/Claude review panel unless --engine changes the first reviewer.",
+    )
+    parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.")
     parser.add_argument(
         "--thinking",
-        choices=["low", "medium", "high", "xhigh"],
-        default=os.environ.get("AUTOREVIEW_THINKING", "high"),
+        action="append",
+        help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.",
+    )
+    parser.add_argument(
+        "--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails."
+    )
+    parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex"))
+    parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude"))
+    parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid"))
+    parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot"))
+    parser.add_argument(
+        "--no-tools",
+        dest="tools",
+        action="store_false",
+        default=True,
+        help="Disable tools for engines that support it. Codex and copilot reject no-tools review.",
     )
     parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True)
+    parser.add_argument(
+        "--claude-allowed-tools",
+        default=os.environ.get(
+            "AUTOREVIEW_CLAUDE_TOOLS",
+            "Read,Grep,Glob,WebSearch,WebFetch",
+        ),
+    )
     parser.add_argument("--prompt", action="append", help="Additional review instruction text.")
     parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.")
-    parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the bundle.")
+    parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.")
     parser.add_argument("--output", help="Write human output to a file as well as stdout.")
     parser.add_argument("--json-output", help="Write validated structured review JSON.")
-    parser.add_argument("--parallel-tests", help="Run a focused test command concurrently with review.")
-    parser.add_argument("--require-finding", action="append", default=[], help="Require finding text to contain this substring.")
-    parser.add_argument("--expect-findings", action="store_true", help="Treat findings as success for harness checks.")
-    parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.")
     parser.add_argument("--heartbeat-seconds", type=int, default=60)
-    parser.add_argument("--dry-run", action="store_true", help="Resolve target and bundle context without calling Codex.")
-    return parser.parse_args(argv)
+    parser.add_argument(
+        "--stream-engine-output",
+        action="store_true",
+        default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1",
+        help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.",
+    )
+    parser.add_argument(
+        "--parallel-tests", help="Run a test command concurrently with review; failure fails the helper."
+    )
+    parser.add_argument(
+        "--parallel-tests-shell",
+        choices=["default", "cmd", "powershell", "pwsh"],
+        default=os.environ.get("AUTOREVIEW_PARALLEL_TESTS_SHELL", "default"),
+        help="Shell for --parallel-tests. Default preserves Python shell=True platform behavior; use powershell or pwsh for PowerShell-specific commands.",
+    )
+    parser.add_argument(
+        "--require-finding", action="append", default=[], help="Require finding text to contain this substring."
+    )
+    parser.add_argument(
+        "--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests."
+    )
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+    if args.engine not in ENGINES:
+        raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}")
+    return args
+
+
+def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str:
+    if args.engine == "codex":
+        return run_codex(args, repo, prompt)
+    if args.engine == "claude":
+        return run_claude(args, repo, prompt)
+    if args.engine == "droid":
+        return run_droid(args, repo, prompt)
+    if args.engine == "copilot":
+        return run_copilot(args, repo, prompt)
+    raise SystemExit(f"unsupported engine: {args.engine}")
+
+
+def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]:
+    global_value: str | None = None
+    per_engine: dict[str, str] = {}
+    for raw in values or []:
+        value = raw.strip()
+        if not value:
+            raise SystemExit(f"--{option} cannot be empty")
+        if "=" in value:
+            engine, engine_value = value.split("=", 1)
+            engine = engine.strip()
+            engine_value = engine_value.strip()
+            if engine not in ENGINES:
+                raise SystemExit(f"--{option} uses unknown engine: {engine}")
+            if not engine_value:
+                raise SystemExit(f"--{option} for {engine} cannot be empty")
+            if engine in per_engine:
+                raise SystemExit(f"--{option} specified more than once for {engine}")
+            per_engine[engine] = engine_value
+        else:
+            if global_value is not None:
+                raise SystemExit(f"--{option} global value specified more than once")
+            global_value = value
+    return global_value, per_engine
+
+
+def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]:
+    parts = [part.strip() for part in token.split(":")]
+    if len(parts) > 3 or not parts[0]:
+        raise SystemExit(f"invalid reviewer spec: {token}")
+    engine = parts[0]
+    if engine not in ENGINES:
+        raise SystemExit(f"unknown reviewer engine: {engine}")
+    model = parts[1] if len(parts) >= 2 and parts[1] else None
+    thinking = parts[2] if len(parts) == 3 and parts[2] else None
+    return engine, model, thinking
+
+
+def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]:
+    global_model, model_by_engine = parse_keyed_options(args.model, "model")
+    global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking")
+    reviewers: list[tuple[str, str | None, str | None]] = []
+    if args.reviewers:
+        tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()]
+        if len(tokens) == 1 and tokens[0] == "all":
+            tokens = list(ENGINES)
+        reviewers = [parse_reviewer_token(token) for token in tokens]
+    elif args.panel:
+        engines = [args.engine]
+        for engine in ("codex", "claude"):
+            if engine not in engines:
+                engines.append(engine)
+        reviewers = [(engine, None, None) for engine in engines]
+    else:
+        reviewers = [(args.engine, None, None)]
+
+    seen: set[str] = set()
+    result: list[argparse.Namespace] = []
+    for engine, inline_model, inline_thinking in reviewers:
+        if engine in seen:
+            raise SystemExit(f"reviewer specified more than once: {engine}")
+        seen.add(engine)
+        model = inline_model or model_by_engine.get(engine) or global_model
+        thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking
+        if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]:
+            valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none"
+            raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})")
+        clone = copy.copy(args)
+        clone.engine = engine
+        clone.model = model
+        clone.thinking = thinking
+        result.append(clone)
+    return result
 
 
-def main(argv: list[str]) -> int:
-    args = parse_args(argv)
+def reviewer_label(args: argparse.Namespace) -> str:
+    parts = [args.engine]
+    if args.model:
+        parts.append(f"model={args.model}")
+    if args.thinking:
+        parts.append(f"thinking={args.thinking}")
+    return " ".join(parts)
+
+
+def run_reviewer(
+    args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str]
+) -> dict[str, Any]:
+    raw = run_engine(args, repo, prompt)
+    report = extract_json(raw)
+    validate_report(report, repo, changed_paths, required)
+    return report
+
+
+def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]:
+    findings: list[dict[str, Any]] = []
+    seen: set[tuple[str, int, str, str]] = set()
+    for label, report in reports:
+        for finding in report["findings"]:
+            location = finding["code_location"]
+            key = (
+                location["file_path"],
+                location["line"],
+                finding["category"],
+                " ".join(finding["title"].lower().split()),
+            )
+            if key in seen:
+                continue
+            seen.add(key)
+            merged = copy.deepcopy(finding)
+            merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2400)
+            findings.append(merged)
+    incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports)
+    summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports)
+    return {
+        "findings": findings,
+        "overall_correctness": "patch is incorrect" if incorrect else "patch is correct",
+        "overall_explanation": f"Panel review complete. {summary}.",
+        "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5),
+    }
+
+
+def run_panel(
+    args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str]
+) -> dict[str, Any]:
+    reports: list[tuple[str, dict[str, Any]]] = []
+    failures: list[str] = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor:
+        future_by_label = {
+            executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer)
+            for reviewer in reviewers
+        }
+        for future in concurrent.futures.as_completed(future_by_label):
+            label = future_by_label[future]
+            try:
+                reports.append((label, future.result()))
+            except SystemExit as exc:
+                failures.append(f"{label}: {exc}")
+            except Exception as exc:
+                failures.append(f"{label}: {exc}")
+    if failures and not args.allow_partial_panel:
+        raise SystemExit("autoreview panel failed\n" + "\n".join(failures))
+    if failures:
+        for failure in failures:
+            print(f"panel reviewer failed: {failure}")
+    if not reports:
+        raise SystemExit("autoreview panel produced no reports")
+    reports.sort(key=lambda item: item[0])
+    report = merge_panel_reports(reports)
+    validate_report(report, repo, changed_paths, args.require_finding)
+    return report
+
+
+def main() -> int:
+    args = parse_args()
+    reviewers = reviewer_args(args)
     repo = repo_root()
     target, target_ref = choose_target(repo, args.mode, args.base)
     print(f"autoreview target: {target}")
     print(f"branch: {current_branch(repo)}")
-    print("engine: codex")
-    if args.model:
-        print(f"model: {args.model}")
-    print(f"thinking: {args.thinking}")
+    if len(reviewers) == 1 and not args.reviewers and not args.panel:
+        print(f"engine: {reviewers[0].engine}")
+        if reviewers[0].model:
+            print(f"model: {reviewers[0].model}")
+        if reviewers[0].thinking:
+            print(f"thinking: {reviewers[0].thinking}")
+    else:
+        print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}")
+    print(f"tools: {'on' if args.tools else 'off'}")
     print(f"web_search: {'on' if args.web_search else 'off'}")
     display_ref = args.commit if target == "commit" else target_ref
     if display_ref:
@@ -826,13 +1400,17 @@ def main(argv: list[str]) -> int:
     if target == "local":
         bundle = local_bundle(repo)
     elif target == "branch":
-        if target_ref is None:
-            raise SystemExit("internal error: branch target missing base ref")
+        assert target_ref
         bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch)
+    elif target == "codebase":
+        changed_paths = codebase_paths(repo)
+        bundle = codebase_bundle(repo, changed_paths)
     else:
         bundle = commit_bundle(repo, args.commit)
         target_ref = args.commit
-    changed_paths = review_paths(repo, target, target_ref, args.commit)
+        changed_paths = review_paths(repo, target, target_ref, args.commit)
+    if target != "codebase":
+        changed_paths = review_paths(repo, target, target_ref, args.commit)
     instructions = instruction_bundle(repo, changed_paths)
     prompt = build_prompt(
         repo,
@@ -844,28 +1422,32 @@ def main(argv: list[str]) -> int:
         load_extra_prompt(args),
         load_datasets(args),
     )
-    print(f"changed paths: {len(changed_paths)}")
+    print(f"scope paths: {len(changed_paths)}")
     print(f"bundle: {len(prompt)} chars")
     if args.dry_run:
         return 0
 
-    tests_proc: tuple[subprocess.Popen[Any], float] | None = None
+    tests_proc: tuple[subprocess.Popen, float] | None = None
     if args.parallel_tests:
-        tests_proc = start_parallel_tests(args.parallel_tests, repo)
+        tests_proc = start_parallel_tests(args.parallel_tests, repo, args.parallel_tests_shell)
     try:
-        raw = run_codex(args, repo, prompt)
-        report = extract_json(raw)
-        validate_report(report, changed_paths, args.require_finding)
+        if len(reviewers) == 1:
+            report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding)
+            label = "autoreview"
+        else:
+            report = run_panel(args, reviewers, repo, prompt, changed_paths)
+            label = "autoreview panel"
         if args.json_output:
             Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n")
+
         if args.output:
             original_stdout = sys.stdout
             with Path(args.output).open("w") as handle:
                 sys.stdout = Tee(original_stdout, handle)
-                print_report(report)
+                print_report(report, label=label)
                 sys.stdout = original_stdout
         else:
-            print_report(report)
+            print_report(report, label=label)
     finally:
         tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0
 
@@ -892,4 +1474,4 @@ class Tee:
 
 
 if __name__ == "__main__":
-    raise SystemExit(main(sys.argv[1:]))
+    raise SystemExit(main())
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1
new file mode 100644
index 00000000..51007a64
--- /dev/null
+++ b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1
@@ -0,0 +1,51 @@
+[CmdletBinding()]
+param(
+    [ValidateSet('buggy', 'benign')]
+    [string] $Fixture,
+
+    [ValidateSet('codex', 'claude', 'droid', 'copilot')]
+    [string[]] $Engine,
+
+    [switch] $DryRun,
+
+    [Alias('h')]
+    [switch] $Help
+)
+
+$ErrorActionPreference = 'Stop'
+
+$Harness = Join-Path $PSScriptRoot 'test-review-harness.py'
+$ForwardedArgs = @()
+
+if ($Help) {
+    $ForwardedArgs += '--help'
+}
+
+if ($PSBoundParameters.ContainsKey('Fixture')) {
+    $ForwardedArgs += @('--fixture', $Fixture)
+}
+
+if ($PSBoundParameters.ContainsKey('Engine')) {
+    foreach ($SelectedEngine in $Engine) {
+        $ForwardedArgs += @('--engine', $SelectedEngine)
+    }
+}
+
+if ($DryRun) {
+    $ForwardedArgs += '--dry-run'
+}
+
+$PyLauncher = Get-Command py -ErrorAction SilentlyContinue
+if ($null -ne $PyLauncher) {
+    & $PyLauncher.Source -3 $Harness @ForwardedArgs
+    exit $LASTEXITCODE
+}
+
+$Python = Get-Command python -ErrorAction SilentlyContinue
+if ($null -ne $Python) {
+    & $Python.Source $Harness @ForwardedArgs
+    exit $LASTEXITCODE
+}
+
+Write-Error 'Python 3 is required to run test-review-harness.'
+exit 127
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py
index 8a7540d5..8035c42f 100755
--- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py
+++ b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py
@@ -12,6 +12,8 @@
 from pathlib import Path
 
 
+ENGINES = ("codex", "claude", "droid", "copilot")
+
 SAFE_INITIAL = """import numpy as np
 
 
@@ -63,20 +65,21 @@ def trim_eeg(eeg, start_sample, stop_sample):
     "Review normally and report only concrete defects introduced by the patch."
 )
 BENIGN_PROMPT = (
-    "Calibration fixture: this EEG change intentionally validates sample bounds "
-    "and preserves channel-major data. Do not flag it unless there is a concrete bug."
+    "Calibration fixture: this EEG change validates 1-based sample bounds and "
+    "preserves channel-major data. Do not flag it unless there is a concrete bug."
 )
 
 
 def parse_args(argv: list[str]) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
-        description=(
-            "Create a temporary EEG-style git repo and run the adapted autoreview helper "
-            "against a buggy or benign patch."
-        )
+        prog="test-review-harness",
+        description="Create a temporary EEG-style repo and run the adapted autoreview helper.",
     )
     parser.add_argument("--fixture", choices=("buggy", "benign"), default="buggy")
-    parser.add_argument("--dry-run", action="store_true", help="Only verify helper target selection.")
+    parser.add_argument("--engine", action="append", choices=ENGINES, dest="engines")
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Verify target/bundle setup without spending a model call."
+    )
     return parser.parse_args(argv)
 
 
@@ -98,38 +101,37 @@ def create_fixture_repo(repo: Path, fixture: str) -> None:
     write_fixture_file(repo, BUGGY_CHANGED if fixture == "buggy" else BENIGN_CHANGED)
 
 
-def run_review(repo: Path, script_dir: Path, fixture: str, *, dry_run: bool) -> None:
+def run_reviews(repo: Path, script_dir: Path, fixture: str, engines: list[str], *, dry_run: bool) -> None:
     autoreview = script_dir / "autoreview"
-    command = [
-        sys.executable,
-        str(autoreview),
-        "--mode",
-        "local",
-        "--prompt",
-        BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT,
-    ]
-    if fixture == "buggy":
-        command.extend(["--require-finding", "channel", "--expect-findings"])
-    if dry_run:
-        command.append("--dry-run")
-    run(command, repo)
+    for engine in engines:
+        print(f"== {engine} ==", flush=True)
+        command = [
+            sys.executable,
+            str(autoreview),
+            "--mode",
+            "local",
+            "--engine",
+            engine,
+            "--prompt",
+            BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT,
+        ]
+        if fixture == "buggy":
+            command.extend(["--require-finding", "channel", "--expect-findings"])
+        if dry_run:
+            command.append("--dry-run")
+        run(command, repo)
 
 
 def cleanup_repo(repo: Path) -> None:
-    def make_writable_and_retry(
-        function: Callable[[str], object],
-        path: str,
-        _exc_info: object,
-    ) -> None:
+    def make_writable_and_retry(function: Callable[[str], object], path: str, _exc_info: object) -> None:
         try:
             os.chmod(path, stat.S_IREAD | stat.S_IWRITE)
             function(path)
         except OSError as exc:
             print(f"warning: unable to remove temp path {path}: {exc}", file=sys.stderr)
 
-    if not repo.exists():
-        return
-    shutil.rmtree(repo, onerror=make_writable_and_retry)
+    if repo.exists():
+        shutil.rmtree(repo, onerror=make_writable_and_retry)
 
 
 def main(argv: list[str]) -> int:
@@ -138,7 +140,7 @@ def main(argv: list[str]) -> int:
     repo = Path(tempfile.mkdtemp(prefix="eegprep-autoreview-fixture."))
     try:
         create_fixture_repo(repo, args.fixture)
-        run_review(repo, script_dir, args.fixture, dry_run=args.dry_run)
+        run_reviews(repo, script_dir, args.fixture, args.engines or ["codex"], dry_run=args.dry_run)
     except subprocess.CalledProcessError as exc:
         return int(exc.returncode or 1)
     finally:

From 732d6e27ed9ce067e618c2097712c6c20eacaa0c Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Mon, 8 Jun 2026 20:49:17 -0700
Subject: [PATCH 02/16] Strengthen autoreview architecture checks

---
 .agents/skills/oc-autoreview-adapted/SKILL.md      | 14 +++++++++++++-
 .../oc-autoreview-adapted/agents/openai.yaml       |  4 ++--
 .../oc-autoreview-adapted/scripts/autoreview       | 11 +++++++++++
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md
index 763a9f4b..02891340 100644
--- a/.agents/skills/oc-autoreview-adapted/SKILL.md
+++ b/.agents/skills/oc-autoreview-adapted/SKILL.md
@@ -12,7 +12,8 @@ Use the bundled helper for high-signal closeout review or whole-codebase bug hun
 - Run it for real unless the user asked only for a plan.
 - Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it.
 - Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects.
-- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, and subjective MATLAB/Python style comments.
+- Accept structural findings when the code becomes harder to ship: spaghetti branching, wrong ownership layer, duplicate canonical helpers, non-atomic state updates, file sprawl, weak data boundaries, or indirection that hides EEG invariants.
+- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, subjective MATLAB/Python style comments, and "cleaner someday" feedback without a concrete failure mode.
 - If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason.
 - Do not invoke nested review tools from inside review. The helper already runs one structured review path.
 - Do not push/stage/commit/open PR unless the user requested that separately.
@@ -95,6 +96,17 @@ Prioritize:
 - packaged Markdown help for GUI Help / `pophelp`;
 - realistic EEG-size performance and concrete security/path/I/O risks.
 
+## Architecture Bar
+
+Use this only for code that will make EEGPrep less reliable or maintainable, not for taste.
+
+- Look for a simpler "code judo" move that preserves behavior while deleting branches, modes, helper layers, or special cases.
+- Flag spaghetti growth: ad-hoc conditionals in busy flows, scattered feature checks, one-off booleans, nullable modes, and partial updates.
+- Keep logic in the canonical layer: signal processing in `sigprocfunc`, user wrappers in `popfunc`, GUI/session coordination in `guifunc`/`adminfunc`, plugin code in its plugin package, CLI orchestration outside core math.
+- Prefer existing helpers/contracts over near-duplicates; remove thin wrappers or generic magic that hide simple EEG data shapes.
+- Treat file-size growth past roughly 1000 lines as a warning in diff review; in whole-codebase audits, flag large modules only with a concrete bug-prone coupling or focused split.
+- Prefer fixes that remove concepts, collapse duplicate branches, clarify data boundaries, or make state/session/history updates atomic.
+
 ## Loop
 
 1. Format first if formatting can change line locations.
diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
index 7c1c3100..7b94c64a 100644
--- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
+++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
   display_name: "OC Autoreview Adapted"
-  short_description: "EEGPrep autoreview for diffs or codebase audits"
-  default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real findings."
+  short_description: "EEGPrep autoreview for bugs, parity, and architecture"
+  default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real bugs, parity breaks, and structural issues."
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
index 5fdee362..e623ea05 100755
--- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview
+++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
@@ -52,6 +52,7 @@ CATEGORIES = {
     "regression",
     "test_gap",
     "maintainability",
+    "architecture",
     "eeglab_parity",
     "data_structure",
     "gui_session",
@@ -590,6 +591,16 @@ def build_prompt(
         - For each finding, use the smallest file/line location that demonstrates the issue.
         - If there are no actionable findings, return an empty findings array and mark the patch correct.
 
+        Structural quality bar:
+        - Do not approve code merely because it works. Flag architecture that makes EEGPrep harder to ship reliably.
+        - Look for a simpler "code judo" move: preserving behavior while deleting branches, modes, helper layers, or special cases.
+        - Flag spaghetti growth: ad-hoc conditionals inserted into busy flows, one-off booleans, nullable modes, scattered feature checks, and partial state updates.
+        - Flag logic in the wrong layer: GUI/session code doing signal processing, pop_* wrappers owning low-level math, CLI/console code bypassing session/history contracts, or runtime paths depending on vendored EEGLAB.
+        - Prefer canonical helpers and existing contracts over bespoke near-duplicates. Report duplicate helpers when a clear canonical home already exists.
+        - Be skeptical of thin wrappers, identity abstractions, cast-heavy/loosely-shaped boundaries, or generic magic that hides simple EEG data invariants.
+        - Treat file sprawl as a smell. In diff review, flag changes that push a file past roughly 1000 lines without a strong reason. In codebase audit, flag oversized modules only when there is a concrete bug-prone coupling or a focused decomposition path.
+        - Prefer remedies that remove concepts, collapse duplicate branches, make state updates atomic, clarify typed/data boundaries, or move logic to the owning module.
+
         EEGPrep review priorities:
         - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows.
         - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures.

From a649906a9a58db14436c837cf4c3214b58755a26 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Tue, 9 Jun 2026 04:08:18 -0700
Subject: [PATCH 03/16] Add scoped autoreview campaign workflow

---
 .agents/skills/oc-autoreview-adapted/SKILL.md | 40 ++++++++++
 .../oc-autoreview-adapted/agents/openai.yaml  |  4 +-
 .../oc-autoreview-adapted/scripts/autoreview  | 74 +++++++++++++++++--
 3 files changed, 108 insertions(+), 10 deletions(-)

diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md
index 02891340..ff382e81 100644
--- a/.agents/skills/oc-autoreview-adapted/SKILL.md
+++ b/.agents/skills/oc-autoreview-adapted/SKILL.md
@@ -17,6 +17,7 @@ Use the bundled helper for high-signal closeout review or whole-codebase bug hun
 - If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason.
 - Do not invoke nested review tools from inside review. The helper already runs one structured review path.
 - Do not push/stage/commit/open PR unless the user requested that separately.
+- If the user asks to review the whole codebase and wants fixes, default to the campaign workflow below: branch/worktree per codebase area, review/fix/test/rerun, then open a PR before moving to the next area.
 
 ## Commands
 
@@ -54,6 +55,44 @@ Whole EEGPrep-owned codebase audit:
 
 The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope.
 
+Scoped codebase audit:
+
+```bash
+"$AUTOREVIEW" --mode codebase \
+  --path src/eegprep/functions/popfunc \
+  --path tests/test_pop_utils.py \
+  --thinking codex=xhigh
+```
+
+Use `--scope-file scopes.txt` when a slice has many paths.
+
+## Default Whole-Codebase Campaign
+
+When asked to review the whole codebase hands-off, do not make one giant PR. Split work into PR-sized areas, usually:
+
+- `popfunc`: `src/eegprep/functions/popfunc`, matching pop tests/help.
+- `sigproc`: `src/eegprep/functions/sigprocfunc`, numerical/parity tests.
+- `gui-session`: `src/eegprep/functions/guifunc`, `adminfunc`, console/session tests.
+- `plugins`: `src/eegprep/plugins`, bundled plugin tests/resources.
+- `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence.
+- `cli-docs-tools`: CLI, docs, skills, tools, workflows.
+
+For each area:
+
+1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`.
+2. Run `autoreview --mode codebase --path ... --thinking codex=xhigh`.
+3. Verify each finding from first principles. Fix real issues even when the fix touches a related helper outside the initial path scope; keep the PR conceptually tied to that area.
+4. Run focused tests, lint/type checks when relevant, then rerun the same scoped autoreview until clean or until remaining findings are rejected with reasons.
+5. Commit, push, and open a PR before starting the next area.
+
+PR body must list every finding reviewed:
+
+- **Fixed:** finding, root cause, files changed, tests run.
+- **Rejected:** finding, why it is not real or not worth changing.
+- **Follow-up:** only when real but intentionally outside this PR's area.
+
+Do not auto-merge. The human reviews each PR normally.
+
 ## Useful Options
 
 - `--engine codex|claude|droid|copilot`; default is Codex.
@@ -62,6 +101,7 @@ The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and
 - `--stream-engine-output` to see compact live engine activity.
 - `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs.
 - `--prompt` / `--prompt-file` / `--dataset` to add evidence.
+- `--path` / `--scope-file` to constrain a codebase, branch, local, or commit review to a PR-sized area.
 - `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts.
 - `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing.
 - `--skip-fetch` avoids fetching before branch diffs.
diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
index 7b94c64a..55464109 100644
--- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
+++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
   display_name: "OC Autoreview Adapted"
-  short_description: "EEGPrep autoreview for bugs, parity, and architecture"
-  default_prompt: "Use $oc-autoreview-adapted to review the current EEGPrep branch or run a whole-codebase audit, then verify and fix real bugs, parity breaks, and structural issues."
+  short_description: "EEGPrep autoreview loops and PR campaigns"
+  default_prompt: "Use $oc-autoreview-adapted to run scoped autoreview loops or a whole-codebase PR campaign, then verify and fix real bugs, parity breaks, and structural issues."
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
index e623ea05..2c0c8e9b 100755
--- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview
+++ b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
@@ -26,6 +26,7 @@ from __future__ import annotations
 import argparse
 import concurrent.futures
 import copy
+import fnmatch
 import json
 import os
 import queue
@@ -438,13 +439,53 @@ def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str:
     )
 
 
+def normalize_scope_spec(scope: str) -> str:
+    normalized = scope.strip().replace("\\", "/").removeprefix("./").rstrip("/")
+    if not normalized:
+        raise SystemExit("scope path cannot be empty")
+    path = Path(normalized)
+    if path.is_absolute() or ".." in path.parts:
+        raise SystemExit(f"scope path must be repo-relative: {scope}")
+    return normalized
+
+
+def load_scope_specs(args: argparse.Namespace) -> list[str]:
+    scopes = [normalize_scope_spec(scope) for scope in args.path or []]
+    for scope_file in args.scope_file or []:
+        path = Path(scope_file)
+        for line in path.read_text().splitlines():
+            value = line.split("#", 1)[0].strip()
+            if value:
+                scopes.append(normalize_scope_spec(value))
+    seen: set[str] = set()
+    result: list[str] = []
+    for scope in scopes:
+        if scope in seen:
+            continue
+        seen.add(scope)
+        result.append(scope)
+    return result
+
+
+def path_matches_scope(rel: str, scopes: list[str]) -> bool:
+    if not scopes:
+        return True
+    return any(
+        rel == scope
+        or rel.startswith(f"{scope}/")
+        or fnmatch.fnmatchcase(rel, scope)
+        or fnmatch.fnmatchcase(rel, f"{scope}/**")
+        for scope in scopes
+    )
+
+
 def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]:
-    prefixes = [scope.strip().rstrip("/") for scope in scopes or [] if scope.strip()]
+    scope_specs = scopes or []
     paths: set[str] = set()
     for rel in git(repo, "ls-files").splitlines():
         if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES):
             continue
-        if prefixes and not any(rel == prefix or rel.startswith(f"{prefix}/") for prefix in prefixes):
+        if not path_matches_scope(rel, scope_specs):
             continue
         paths.add(rel)
     return paths
@@ -480,10 +521,12 @@ def commit_bundle(repo: Path, commit_ref: str) -> str:
     )
 
 
-def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: str) -> set[str]:
+def review_paths(
+    repo: Path, target: str, target_ref: str | None, commit_ref: str, scopes: list[str] | None = None
+) -> set[str]:
     names: set[str] = set()
     if target == "codebase":
-        return codebase_paths(repo)
+        return codebase_paths(repo, scopes)
     if target == "local":
         sources = [
             git(repo, "diff", "--name-only", "--cached"),
@@ -498,7 +541,7 @@ def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: st
     for source in sources:
         for line in source.splitlines():
             path = line.strip()
-            if path:
+            if path and path_matches_scope(path, scopes or []):
                 names.add(path)
     return names
 
@@ -1193,6 +1236,16 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--prompt", action="append", help="Additional review instruction text.")
     parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.")
     parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.")
+    parser.add_argument(
+        "--path",
+        action="append",
+        help="Repo-relative file, directory, or glob to include in the review scope. Repeat for multiple scopes.",
+    )
+    parser.add_argument(
+        "--scope-file",
+        action="append",
+        help="File containing repo-relative --path scopes, one per line. Blank lines and # comments are ignored.",
+    )
     parser.add_argument("--output", help="Write human output to a file as well as stdout.")
     parser.add_argument("--json-output", help="Write validated structured review JSON.")
     parser.add_argument("--heartbeat-seconds", type=int, default=60)
@@ -1389,6 +1442,7 @@ def run_panel(
 
 def main() -> int:
     args = parse_args()
+    scope_specs = load_scope_specs(args)
     reviewers = reviewer_args(args)
     repo = repo_root()
     target, target_ref = choose_target(repo, args.mode, args.base)
@@ -1407,6 +1461,8 @@ def main() -> int:
     display_ref = args.commit if target == "commit" else target_ref
     if display_ref:
         print(f"ref: {display_ref}")
+    if scope_specs:
+        print(f"path filters: {', '.join(scope_specs)}")
 
     if target == "local":
         bundle = local_bundle(repo)
@@ -1414,14 +1470,16 @@ def main() -> int:
         assert target_ref
         bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch)
     elif target == "codebase":
-        changed_paths = codebase_paths(repo)
+        changed_paths = codebase_paths(repo, scope_specs)
         bundle = codebase_bundle(repo, changed_paths)
     else:
         bundle = commit_bundle(repo, args.commit)
         target_ref = args.commit
-        changed_paths = review_paths(repo, target, target_ref, args.commit)
+        changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs)
     if target != "codebase":
-        changed_paths = review_paths(repo, target, target_ref, args.commit)
+        changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs)
+    if scope_specs and not changed_paths:
+        raise SystemExit("no files matched --path/--scope-file for this review target")
     instructions = instruction_bundle(repo, changed_paths)
     prompt = build_prompt(
         repo,

From dac62e07295f817395a1cc2eca7dedd4f0f92c50 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Tue, 9 Jun 2026 04:14:10 -0700
Subject: [PATCH 04/16] Add parallel autoreview campaign scaffolding

---
 .agents/skills/oc-autoreview-adapted/SKILL.md |  22 ++
 .../oc-autoreview-adapted/agents/openai.yaml  |   4 +-
 .../scripts/new-review-campaign.py            | 279 ++++++++++++++++++
 .gitignore                                    |   1 +
 4 files changed, 304 insertions(+), 2 deletions(-)
 create mode 100755 .agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py

diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md
index ff382e81..37e0ea72 100644
--- a/.agents/skills/oc-autoreview-adapted/SKILL.md
+++ b/.agents/skills/oc-autoreview-adapted/SKILL.md
@@ -26,6 +26,7 @@ Set paths once:
 ```bash
 export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview"
 export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness"
+export AUTOREVIEW_CAMPAIGN=".agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py"
 ```
 
 Dirty local work:
@@ -65,6 +66,7 @@ Scoped codebase audit:
 ```
 
 Use `--scope-file scopes.txt` when a slice has many paths.
+Quote any `--path` value that contains shell globs, e.g. `--path 'tests/test_pop_*.py'`.
 
 ## Default Whole-Codebase Campaign
 
@@ -77,6 +79,14 @@ When asked to review the whole codebase hands-off, do not make one giant PR. Spl
 - `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence.
 - `cli-docs-tools`: CLI, docs, skills, tools, workflows.
 
+Start by scaffolding an orchestration artifact:
+
+```bash
+uv run python "$AUTOREVIEW_CAMPAIGN" "EEGPrep whole-codebase autoreview"
+```
+
+This creates `.workflow/<slug>/` with `plan.md`, `state.json`, `orchestration.md`, `packets/`, `results/`, and `final-report.md`. Keep `plan.md` human-readable, update `state.json` as packet status changes, and write integration evidence in `final-report.md`.
+
 For each area:
 
 1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`.
@@ -93,6 +103,18 @@ PR body must list every finding reviewed:
 
 Do not auto-merge. The human reviews each PR normally.
 
+## Parallel Subagents
+
+Use parallel agents by default for whole-codebase campaigns when the environment exposes subagent/thread/worktree tools and the user has asked for hands-off or parallel work.
+
+- Launch at most 3 packet agents at once unless the user approves more.
+- Give each packet agent its `packets/<id>.md`, base branch, branch name, path scope, test commands, AGENTS.md constraints, and PR-body requirements.
+- Packet agents may edit related files outside their path scope only when required by the verified root cause; they must explain that in the PR.
+- Do not duplicate work across agents. If a packet blocks on another packet's result, keep it pending.
+- Parent agent owns integration: track packet PR URLs, inspect conflicts, synthesize accepted/rejected findings, and run broader checks after packet PRs merge.
+- If no subagent runner is available, simulate packets sequentially and write packet notes under `results/`.
+- Do not claim that a script launched subagents. The campaign script only scaffolds orchestration; actual subagents require exposed agent/thread tools.
+
 ## Useful Options
 
 - `--engine codex|claude|droid|copilot`; default is Codex.
diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
index 55464109..4453480c 100644
--- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
+++ b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
@@ -1,4 +1,4 @@
 interface:
   display_name: "OC Autoreview Adapted"
-  short_description: "EEGPrep autoreview loops and PR campaigns"
-  default_prompt: "Use $oc-autoreview-adapted to run scoped autoreview loops or a whole-codebase PR campaign, then verify and fix real bugs, parity breaks, and structural issues."
+  short_description: "Parallel EEGPrep autoreview PR campaigns"
+  default_prompt: "Use $oc-autoreview-adapted to run scoped or parallel whole-codebase autoreview loops, then verify and fix real bugs, parity breaks, and structural issues with one PR per codebase area."
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py
new file mode 100755
index 00000000..5ea77a07
--- /dev/null
+++ b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+"""Create an EEGPrep autoreview campaign workflow directory."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import shlex
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import TypedDict
+
+
+class Packet(TypedDict):
+    id: str
+    branch: str
+    paths: list[str]
+    tests: list[str]
+
+
+DEFAULT_PACKETS: list[Packet] = [
+    {
+        "id": "01-popfunc",
+        "branch": "autoreview/popfunc",
+        "paths": [
+            "src/eegprep/functions/popfunc",
+            "src/eegprep/resources/help/pop_*.md",
+            "tests/test_pop_*.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_pop_utils.py tests/test_file_menu_pop_functions.py",
+        ],
+    },
+    {
+        "id": "02-sigproc",
+        "branch": "autoreview/sigproc",
+        "paths": [
+            "src/eegprep/functions/sigprocfunc",
+            "tests/test_*runica*.py",
+            "tests/test_*resample*.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_pop_resample_python.py tests/test_eeg_runica.py tests/test_runica.py tests/test_gui_pop_runica.py",
+        ],
+    },
+    {
+        "id": "03-gui-session",
+        "branch": "autoreview/gui-session",
+        "paths": [
+            "src/eegprep/functions/guifunc",
+            "src/eegprep/functions/adminfunc",
+            "tests/test_console_workspace.py",
+            "tests/test_gui_*.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_console_workspace.py tests/test_gui_main_window.py",
+        ],
+    },
+    {
+        "id": "04-plugins",
+        "branch": "autoreview/plugins",
+        "paths": [
+            "src/eegprep/plugins",
+            "tests/test_*clean*.py",
+            "tests/test_*iclabel*.py",
+            "tests/test_*bids*.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_gui_pop_clean_rawdata.py tests/test_iclabel.py",
+        ],
+    },
+    {
+        "id": "05-io-bids-study",
+        "branch": "autoreview/io-bids-study",
+        "paths": [
+            "src/eegprep/functions/popfunc/pop_fileio.py",
+            "src/eegprep/functions/popfunc/pop_loadset.py",
+            "src/eegprep/functions/popfunc/pop_saveset.py",
+            "src/eegprep/plugins/EEG_BIDS",
+            "src/eegprep/functions/studyfunc",
+            "tests/test_*study*.py",
+            "tests/test_*bids*.py",
+            "tests/test_file_menu_pop_functions.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_file_menu_pop_functions.py tests/test_study_metadata.py tests/test_study_measures.py tests/test_study_clustering.py tests/test_study_end_to_end.py",
+        ],
+    },
+    {
+        "id": "06-cli-docs-tools",
+        "branch": "autoreview/cli-docs-tools",
+        "paths": [
+            "src/eegprep/cli",
+            "docs/source",
+            ".agents/skills",
+            "tools",
+            "scripts",
+            "tests/test_cli*.py",
+        ],
+        "tests": [
+            "uv run --no-sync pytest tests/test_cli_main.py tests/test_cli_transforms.py tests/test_cli_pipeline_qc_report.py tests/test_cli_bids_eeglab_commands.py",
+            "./pre-commit.py --changed-from origin/develop",
+        ],
+    },
+]
+
+
+def slugify(value: str) -> str:
+    slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
+    return slug[:64].strip("-") or "autoreview-campaign"
+
+
+def write_new(path: Path, content: str) -> None:
+    if not path.exists():
+        path.write_text(content, encoding="utf-8")
+
+
+def packet_prompt(packet: Packet, base: str) -> str:
+    paths = "\n".join(f"  --path {shlex.quote(path)} \\" for path in packet["paths"])
+    tests = "\n".join(f"- `{test}`" for test in packet["tests"])
+    return f"""# Packet {packet["id"]}: {packet["branch"]}
+
+## Objective
+Run a scoped EEGPrep autoreview loop for this codebase area, fix real findings from first principles, and open a PR to `{base}`.
+
+## Scope
+{chr(10).join(f"- `{path}`" for path in packet["paths"])}
+
+Fixes may touch related helpers outside this scope when required by the root cause, but keep the PR conceptually tied to this packet.
+
+## Command
+
+```bash
+.agents/skills/oc-autoreview-adapted/scripts/autoreview \\
+  --mode codebase \\
+{paths}
+  --thinking codex=xhigh
+```
+
+## Verification
+Run focused checks first:
+
+{tests}
+
+Then run broader checks if the fix affects shared behavior.
+
+## PR Requirements
+- Branch: `{packet["branch"]}`
+- Target: `{base}`
+- PR body must list every finding reviewed:
+  - Fixed: finding, root cause, files changed, tests run.
+  - Rejected: finding and why it is not real or not worth changing.
+  - Follow-up: only when real but intentionally outside this PR.
+
+## Do Not
+- Do not auto-merge.
+- Do not revert unrelated concurrent work.
+- Do not report vague architecture preferences without concrete failure modes.
+"""
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("title", nargs="?", default="EEGPrep autoreview campaign")
+    parser.add_argument("--root", default=".workflow")
+    parser.add_argument("--slug")
+    parser.add_argument("--base", default="origin/develop")
+    parser.add_argument("--max-concurrent", type=int, default=3)
+    args = parser.parse_args()
+
+    slug = slugify(args.slug or args.title)
+    run_dir = Path(args.root) / slug
+    packets_dir = run_dir / "packets"
+    results_dir = run_dir / "results"
+    packets_dir.mkdir(parents=True, exist_ok=True)
+    results_dir.mkdir(parents=True, exist_ok=True)
+
+    now = datetime.now(UTC).replace(microsecond=0).isoformat()
+    packets = [
+        {
+            "id": packet["id"],
+            "branch": packet["branch"],
+            "paths": packet["paths"],
+            "tests": packet["tests"],
+            "status": "pending",
+            "pr": None,
+        }
+        for packet in DEFAULT_PACKETS
+    ]
+    state = {
+        "title": args.title,
+        "slug": slug,
+        "created_at": now,
+        "status": "planned",
+        "base": args.base,
+        "max_concurrent_agents": args.max_concurrent,
+        "packets": packets,
+        "integration": {"status": "not_started", "notes": ""},
+    }
+    write_new(run_dir / "state.json", json.dumps(state, indent=2) + "\n")
+    write_new(
+        run_dir / "plan.md",
+        f"""# {args.title}
+
+## Goal
+Run parallel scoped autoreview loops across EEGPrep, fix real bugs/parity/architecture issues, and open PRs for human review.
+
+## Success Criteria
+- Every packet has a PR or a recorded no-change result.
+- Each PR body lists fixed, rejected, and follow-up findings.
+- Each packet reruns autoreview after fixes.
+- Integration checks pass after packet PRs merge.
+
+## Constraints
+- Keep AGENTS.md and EEGPrep's EEGLAB parity goal in force.
+- Runtime code must remain standalone and not depend on vendored EEGLAB.
+- Do not auto-merge packet PRs.
+- Max concurrent agents: {args.max_concurrent}.
+
+## Risks
+- Concurrent work conflicts: keep packet ownership mostly disjoint and resolve against authoritative code.
+- Noisy architecture findings: accept only findings with concrete failure modes.
+
+## Work Packets
+{chr(10).join(f"- `{packet['id']}` -> `{packet['branch']}`" for packet in packets)}
+
+## Integration Policy
+Parent agent tracks PRs, resolves conflicts after merges, runs broader checks, and updates final-report.md.
+""",
+    )
+    write_new(
+        run_dir / "orchestration.md",
+        f"""# Orchestration: {args.title}
+
+## Execution Rules
+- Use available subagent/thread/worktree tools when exposed by the environment.
+- Spawn at most {args.max_concurrent} packet agents at once.
+- Each packet owns its branch and opens one PR before the parent starts further work in that area.
+- If no subagent runner is available, execute packets sequentially and write notes in `results/`.
+- Parent integrates packet results; do not paste raw worker dumps as final status.
+
+## Packet Launch
+Give each worker only its packet file plus AGENTS.md context. Workers must not revert unrelated edits and must adapt to concurrent changes.
+
+## Completion Audit
+- All packet PRs created or no-change results recorded.
+- PR bodies include every finding reviewed.
+- Final integration checks recorded in `final-report.md`.
+""",
+    )
+    write_new(
+        run_dir / "final-report.md",
+        f"""# Final Report: {args.title}
+
+## Outcome
+
+## Packet PRs
+
+## Findings Fixed
+
+## Findings Rejected
+
+## Follow-ups
+
+## Integration Verification
+
+## Remaining Risks
+""",
+    )
+    for packet in DEFAULT_PACKETS:
+        write_new(packets_dir / f"{packet['id']}.md", packet_prompt(packet, args.base))
+
+    print(run_dir)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/.gitignore b/.gitignore
index dbe68211..a9967c33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .claude/settings.local.json
 .context/
+.workflow/
 .coveragerc
 .notes/*
 !.notes/.gitkeep

From 57448a5ead105f33cd58b92125f689713eb2bed7 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Tue, 9 Jun 2026 18:25:50 -0700
Subject: [PATCH 05/16] Remove broken autoreview skill

---
 .agents/skills/oc-autoreview-adapted/SKILL.md |  182 --
 .../oc-autoreview-adapted/agents/openai.yaml  |    4 -
 .../oc-autoreview-adapted/scripts/autoreview  | 1546 -----------------
 .../scripts/new-review-campaign.py            |  279 ---
 .../scripts/test-review-harness               |   16 -
 .../scripts/test-review-harness.ps1           |   51 -
 .../scripts/test-review-harness.py            |  152 --
 .gitignore                                    |    1 -
 8 files changed, 2231 deletions(-)
 delete mode 100644 .agents/skills/oc-autoreview-adapted/SKILL.md
 delete mode 100644 .agents/skills/oc-autoreview-adapted/agents/openai.yaml
 delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/autoreview
 delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py
 delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness
 delete mode 100644 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1
 delete mode 100755 .agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py

diff --git a/.agents/skills/oc-autoreview-adapted/SKILL.md b/.agents/skills/oc-autoreview-adapted/SKILL.md
deleted file mode 100644
index 37e0ea72..00000000
--- a/.agents/skills/oc-autoreview-adapted/SKILL.md
+++ /dev/null
@@ -1,182 +0,0 @@
----
-name: oc-autoreview-adapted
-description: Run autonomous EEGPrep-focused structured autoreview on dirty changes, branches, commits, PR stacks, or the whole EEGPrep-owned codebase; verify and fix real findings from first principles using AGENTS.md, EEGLAB parity, GUI/console, tests, docs, and security constraints.
----
-
-# OC Autoreview Adapted
-
-Use the bundled helper for high-signal closeout review or whole-codebase bug hunts. It builds one bounded review bundle, runs one or more read-only reviewer engines, validates structured JSON, prints heartbeats for long runs, and exits nonzero when actionable findings remain.
-
-## Contract
-
-- Run it for real unless the user asked only for a plan.
-- Treat output as advisory. Verify every accepted finding in the real code path before fixing or reporting it.
-- Accept concrete bugs, regressions, EEGLAB parity breaks, unsafe I/O/security risks, missing tests tied to behavior, and maintainability issues that cause real future defects.
-- Accept structural findings when the code becomes harder to ship: spaghetti branching, wrong ownership layer, duplicate canonical helpers, non-atomic state updates, file sprawl, weak data boundaries, or indirection that hides EEG invariants.
-- Reject speculative edge cases, broad rewrites, stale vendored/reference code, generic lint, subjective MATLAB/Python style comments, and "cleaner someday" feedback without a concrete failure mode.
-- If a fix changes code, run focused tests and rerun autoreview on the same target. Stop when the final helper run exits 0 or when a remaining finding is consciously rejected with a concrete reason.
-- Do not invoke nested review tools from inside review. The helper already runs one structured review path.
-- Do not push/stage/commit/open PR unless the user requested that separately.
-- If the user asks to review the whole codebase and wants fixes, default to the campaign workflow below: branch/worktree per codebase area, review/fix/test/rerun, then open a PR before moving to the next area.
-
-## Commands
-
-Set paths once:
-
-```bash
-export AUTOREVIEW=".agents/skills/oc-autoreview-adapted/scripts/autoreview"
-export AUTOREVIEW_HARNESS=".agents/skills/oc-autoreview-adapted/scripts/test-review-harness"
-export AUTOREVIEW_CAMPAIGN=".agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py"
-```
-
-Dirty local work:
-
-```bash
-"$AUTOREVIEW" --mode local
-```
-
-Branch or stacked PR work:
-
-```bash
-base=$(gh pr view --json baseRefName --jq .baseRefName 2>/dev/null || echo develop)
-"$AUTOREVIEW" --mode branch --base "origin/$base"
-```
-
-Single committed change:
-
-```bash
-"$AUTOREVIEW" --mode commit --commit HEAD
-```
-
-Whole EEGPrep-owned codebase audit:
-
-```bash
-"$AUTOREVIEW" --mode codebase --thinking codex=xhigh
-```
-
-The codebase mode is not diff-limited. It lists tracked EEGPrep-owned files and excludes vendored EEGLAB/reference sample data by default; the reviewer may inspect files read-only and report real bugs anywhere in scope.
-
-Scoped codebase audit:
-
-```bash
-"$AUTOREVIEW" --mode codebase \
-  --path src/eegprep/functions/popfunc \
-  --path tests/test_pop_utils.py \
-  --thinking codex=xhigh
-```
-
-Use `--scope-file scopes.txt` when a slice has many paths.
-Quote any `--path` value that contains shell globs, e.g. `--path 'tests/test_pop_*.py'`.
-
-## Default Whole-Codebase Campaign
-
-When asked to review the whole codebase hands-off, do not make one giant PR. Split work into PR-sized areas, usually:
-
-- `popfunc`: `src/eegprep/functions/popfunc`, matching pop tests/help.
-- `sigproc`: `src/eegprep/functions/sigprocfunc`, numerical/parity tests.
-- `gui-session`: `src/eegprep/functions/guifunc`, `adminfunc`, console/session tests.
-- `plugins`: `src/eegprep/plugins`, bundled plugin tests/resources.
-- `io-bids-study`: file I/O, BIDS, STUDY, dataset/session persistence.
-- `cli-docs-tools`: CLI, docs, skills, tools, workflows.
-
-Start by scaffolding an orchestration artifact:
-
-```bash
-uv run python "$AUTOREVIEW_CAMPAIGN" "EEGPrep whole-codebase autoreview"
-```
-
-This creates `.workflow/<slug>/` with `plan.md`, `state.json`, `orchestration.md`, `packets/`, `results/`, and `final-report.md`. Keep `plan.md` human-readable, update `state.json` as packet status changes, and write integration evidence in `final-report.md`.
-
-For each area:
-
-1. Create a fresh worktree/branch from the requested base, e.g. `autoreview/popfunc`.
-2. Run `autoreview --mode codebase --path ... --thinking codex=xhigh`.
-3. Verify each finding from first principles. Fix real issues even when the fix touches a related helper outside the initial path scope; keep the PR conceptually tied to that area.
-4. Run focused tests, lint/type checks when relevant, then rerun the same scoped autoreview until clean or until remaining findings are rejected with reasons.
-5. Commit, push, and open a PR before starting the next area.
-
-PR body must list every finding reviewed:
-
-- **Fixed:** finding, root cause, files changed, tests run.
-- **Rejected:** finding, why it is not real or not worth changing.
-- **Follow-up:** only when real but intentionally outside this PR's area.
-
-Do not auto-merge. The human reviews each PR normally.
-
-## Parallel Subagents
-
-Use parallel agents by default for whole-codebase campaigns when the environment exposes subagent/thread/worktree tools and the user has asked for hands-off or parallel work.
-
-- Launch at most 3 packet agents at once unless the user approves more.
-- Give each packet agent its `packets/<id>.md`, base branch, branch name, path scope, test commands, AGENTS.md constraints, and PR-body requirements.
-- Packet agents may edit related files outside their path scope only when required by the verified root cause; they must explain that in the PR.
-- Do not duplicate work across agents. If a packet blocks on another packet's result, keep it pending.
-- Parent agent owns integration: track packet PR URLs, inspect conflicts, synthesize accepted/rejected findings, and run broader checks after packet PRs merge.
-- If no subagent runner is available, simulate packets sequentially and write packet notes under `results/`.
-- Do not claim that a script launched subagents. The campaign script only scaffolds orchestration; actual subagents require exposed agent/thread tools.
-
-## Useful Options
-
-- `--engine codex|claude|droid|copilot`; default is Codex.
-- `--reviewers codex,claude` or `--panel` for a multi-reviewer pass.
-- `--model codex=gpt-5.1 --thinking codex=xhigh`; Claude also accepts `max`.
-- `--stream-engine-output` to see compact live engine activity.
-- `--parallel-tests "uv run pytest tests/test_file.py"` to run tests while review runs.
-- `--prompt` / `--prompt-file` / `--dataset` to add evidence.
-- `--path` / `--scope-file` to constrain a codebase, branch, local, or commit review to a PR-sized area.
-- `--json-output /tmp/review.json` and `--output /tmp/review.txt` for artifacts.
-- `--mode uncommitted` is an alias for `local`; use branch/commit modes after committing.
-- `--skip-fetch` avoids fetching before branch diffs.
-- `--heartbeat-seconds 60` controls long-run heartbeat cadence.
-
-Smoke check:
-
-```bash
-"$AUTOREVIEW_HARNESS" --dry-run
-"$AUTOREVIEW_HARNESS" --fixture buggy --engine codex
-```
-
-On Windows, use:
-
-```powershell
-python .agents\skills\oc-autoreview-adapted\scripts\autoreview --help
-.agents\skills\oc-autoreview-adapted\scripts\test-review-harness.ps1 -Fixture buggy -Engine codex
-```
-
-## EEGPrep Review Surface
-
-Prioritize:
-
-- correctness, runtime/import failures, bad numerical results, broken common workflows;
-- EEGLAB parity in APIs, `pop_*` wrappers, history commands, GUI layout/behavior, events, and data structures;
-- EEG dict invariants: `data`, `nbchan`, `pnts`, `trials`, `srate`, `xmin`, `xmax`, `times`, `chanlocs`, `event`, `urevent`, `epoch`, `history`, ICA fields;
-- 1-based EEGLAB user indices/latencies versus 0-based Python array indices;
-- channel-major continuous `(nbchan, pnts)` and epoched `(nbchan, pnts, trials)` data;
-- GUI plus `eegprep-console` synchronization through `EEGPrepSession`;
-- `return_com=True`, `(EEG, com)` returns, history replay, and session update paths;
-- runtime independence from `src/eegprep/eeglab`;
-- packaged Markdown help for GUI Help / `pophelp`;
-- realistic EEG-size performance and concrete security/path/I/O risks.
-
-## Architecture Bar
-
-Use this only for code that will make EEGPrep less reliable or maintainable, not for taste.
-
-- Look for a simpler "code judo" move that preserves behavior while deleting branches, modes, helper layers, or special cases.
-- Flag spaghetti growth: ad-hoc conditionals in busy flows, scattered feature checks, one-off booleans, nullable modes, and partial updates.
-- Keep logic in the canonical layer: signal processing in `sigprocfunc`, user wrappers in `popfunc`, GUI/session coordination in `guifunc`/`adminfunc`, plugin code in its plugin package, CLI orchestration outside core math.
-- Prefer existing helpers/contracts over near-duplicates; remove thin wrappers or generic magic that hide simple EEG data shapes.
-- Treat file-size growth past roughly 1000 lines as a warning in diff review; in whole-codebase audits, flag large modules only with a concrete bug-prone coupling or focused split.
-- Prefer fixes that remove concepts, collapse duplicate branches, clarify data boundaries, or make state/session/history updates atomic.
-
-## Loop
-
-1. Format first if formatting can change line locations.
-2. Run autoreview on the smallest sufficient target.
-3. Verify each finding against code and AGENTS.md.
-4. Fix accepted findings at the right ownership boundary.
-5. Run focused tests, then broader tests if risk warrants.
-6. Rerun the same autoreview target.
-7. Final response: command used, tests run, findings fixed/rejected, and final clean result or remaining risk.
-
-If the helper prints `autoreview clean: no accepted/actionable findings reported` and exits 0, report that as clean and stop.
diff --git a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml b/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
deleted file mode 100644
index 4453480c..00000000
--- a/.agents/skills/oc-autoreview-adapted/agents/openai.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-interface:
-  display_name: "OC Autoreview Adapted"
-  short_description: "Parallel EEGPrep autoreview PR campaigns"
-  default_prompt: "Use $oc-autoreview-adapted to run scoped or parallel whole-codebase autoreview loops, then verify and fix real bugs, parity breaks, and structural issues with one PR per codebase area."
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/autoreview b/.agents/skills/oc-autoreview-adapted/scripts/autoreview
deleted file mode 100755
index 2c0c8e9b..00000000
--- a/.agents/skills/oc-autoreview-adapted/scripts/autoreview
+++ /dev/null
@@ -1,1546 +0,0 @@
-#!/usr/bin/env python3
-# Adapted for EEGPrep from OpenClaw's MIT-licensed autoreview helper:
-# https://github.com/openclaw/agent-skills/tree/main/skills/autoreview
-#
-# Original copyright (c) 2026 openclaw.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-from __future__ import annotations
-
-import argparse
-import concurrent.futures
-import copy
-import fnmatch
-import json
-import os
-import queue
-import subprocess
-import sys
-import tempfile
-import textwrap
-import threading
-import time
-from pathlib import Path
-from typing import Any, Callable
-
-
-ENGINES = ("codex", "claude", "droid", "copilot")
-DEFAULT_BASE = "origin/develop"
-TRUNK_BRANCHES = {"develop", "main", "master"}
-CODEBASE_EXCLUDED_PREFIXES = (
-    "src/eegprep/eeglab/",
-    "sample_data/",
-)
-CATEGORIES = {
-    "bug",
-    "security",
-    "regression",
-    "test_gap",
-    "maintainability",
-    "architecture",
-    "eeglab_parity",
-    "data_structure",
-    "gui_session",
-    "docs_help",
-    "performance",
-}
-THINKING_LEVELS_BY_ENGINE = {
-    "codex": {"low", "medium", "high", "xhigh"},
-    "claude": {"low", "medium", "high", "xhigh", "max"},
-    "droid": set(),
-    "copilot": set(),
-}
-
-
-SCHEMA: dict[str, Any] = {
-    "type": "object",
-    "additionalProperties": False,
-    "required": [
-        "findings",
-        "overall_correctness",
-        "overall_explanation",
-        "overall_confidence",
-    ],
-    "properties": {
-        "findings": {
-            "type": "array",
-            "items": {
-                "type": "object",
-                "additionalProperties": False,
-                "required": [
-                    "title",
-                    "body",
-                    "priority",
-                    "confidence",
-                    "category",
-                    "code_location",
-                ],
-                "properties": {
-                    "title": {"type": "string", "minLength": 1, "maxLength": 140},
-                    "body": {"type": "string", "minLength": 1, "maxLength": 2400},
-                    "priority": {"type": "string", "enum": ["P0", "P1", "P2", "P3"]},
-                    "confidence": {"type": "number", "minimum": 0, "maximum": 1},
-                    "category": {
-                        "type": "string",
-                        "enum": sorted(CATEGORIES),
-                    },
-                    "code_location": {
-                        "type": "object",
-                        "additionalProperties": False,
-                        "required": ["file_path", "line"],
-                        "properties": {
-                            "file_path": {"type": "string", "minLength": 1},
-                            "line": {"type": "integer", "minimum": 1},
-                        },
-                    },
-                },
-            },
-        },
-        "overall_correctness": {
-            "type": "string",
-            "enum": ["patch is correct", "patch is incorrect"],
-        },
-        "overall_explanation": {"type": "string", "minLength": 1, "maxLength": 3000},
-        "overall_confidence": {"type": "number", "minimum": 0, "maximum": 1},
-    },
-}
-
-
-def run(
-    args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True
-) -> subprocess.CompletedProcess[str]:
-    result = subprocess.run(
-        args,
-        cwd=cwd,
-        input=input_text,
-        text=True,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-    if check and result.returncode != 0:
-        cmd = " ".join(args)
-        raise SystemExit(f"command failed ({result.returncode}): {cmd}\n{result.stderr or result.stdout}")
-    return result
-
-
-def run_with_heartbeat(
-    args: list[str],
-    cwd: Path,
-    *,
-    input_text: str | None = None,
-    label: str,
-    heartbeat_seconds: int = 60,
-    stream_output: bool = False,
-    stream_display: Callable[[str, str], str | None] | None = None,
-) -> subprocess.CompletedProcess[str]:
-    if stream_output:
-        return run_with_stream(
-            args,
-            cwd,
-            input_text=input_text,
-            label=label,
-            heartbeat_seconds=heartbeat_seconds,
-            stream_display=stream_display,
-        )
-    started = time.monotonic()
-    proc = subprocess.Popen(
-        args,
-        cwd=cwd,
-        stdin=subprocess.PIPE if input_text is not None else None,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-    )
-    first_communicate = True
-    while True:
-        try:
-            stdout, stderr = proc.communicate(
-                input=input_text if first_communicate else None,
-                timeout=heartbeat_seconds,
-            )
-            return subprocess.CompletedProcess(args, int(proc.returncode or 0), stdout, stderr)
-        except subprocess.TimeoutExpired:
-            first_communicate = False
-            elapsed = int(time.monotonic() - started)
-            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
-
-
-def run_with_stream(
-    args: list[str],
-    cwd: Path,
-    *,
-    input_text: str | None,
-    label: str,
-    heartbeat_seconds: int,
-    stream_display: Callable[[str, str], str | None] | None,
-) -> subprocess.CompletedProcess[str]:
-    started = time.monotonic()
-    proc = subprocess.Popen(
-        args,
-        cwd=cwd,
-        stdin=subprocess.PIPE if input_text is not None else None,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True,
-        bufsize=1,
-    )
-    events: queue.Queue[tuple[str, str | None]] = queue.Queue()
-    stdout_parts: list[str] = []
-    stderr_parts: list[str] = []
-
-    def read_stream(name: str, stream: Any) -> None:
-        try:
-            for line in iter(stream.readline, ""):
-                events.put((name, line))
-        finally:
-            events.put((name, None))
-
-    def write_stdin() -> None:
-        if proc.stdin is None or input_text is None:
-            return
-        try:
-            proc.stdin.write(input_text)
-            proc.stdin.close()
-        except BrokenPipeError:
-            return
-
-    threads = [
-        threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True),
-        threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True),
-    ]
-    for thread in threads:
-        thread.start()
-    stdin_thread = threading.Thread(target=write_stdin, daemon=True)
-    stdin_thread.start()
-
-    open_streams = 2
-    while open_streams:
-        try:
-            name, line = events.get(timeout=heartbeat_seconds)
-        except queue.Empty:
-            elapsed = int(time.monotonic() - started)
-            print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True)
-            continue
-        if line is None:
-            open_streams -= 1
-            continue
-        if name == "stdout":
-            stdout_parts.append(line)
-        else:
-            stderr_parts.append(line)
-        display = stream_display(name, line) if stream_display else line
-        if display:
-            target = sys.stdout if name == "stdout" else sys.stderr
-            target.write(display)
-            target.flush()
-
-    for thread in threads:
-        thread.join()
-    stdin_thread.join(timeout=1)
-    returncode = proc.wait()
-    return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts))
-
-
-def git(repo: Path, *args: str, check: bool = True) -> str:
-    return run([resolve_command("git", repo), *args], repo, check=check).stdout
-
-
-def repo_root() -> Path:
-    start = Path.cwd().resolve()
-    unsafe_root = discover_repo_root(start) or start
-    git_bin = find_command("git", unsafe_root)
-    if not git_bin:
-        raise SystemExit("git executable not found. Install Git or add it to PATH.")
-    result = subprocess.run(
-        [git_bin, "rev-parse", "--show-toplevel"],
-        text=True,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-    )
-    if result.returncode != 0:
-        raise SystemExit("autoreview must run inside a git repository")
-    return Path(result.stdout.strip()).resolve()
-
-
-def discover_repo_root(start: Path) -> Path | None:
-    current = start
-    while True:
-        if (current / ".git").exists():
-            return current
-        if current.parent == current:
-            return None
-        current = current.parent
-
-
-def current_branch(repo: Path) -> str:
-    return git(repo, "branch", "--show-current", check=False).strip() or "detached"
-
-
-def is_dirty(repo: Path) -> bool:
-    return bool(git(repo, "status", "--porcelain").strip())
-
-
-def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]:
-    mode = "local" if mode == "uncommitted" else mode
-    branch = current_branch(repo)
-    if mode == "local" or (mode == "auto" and is_dirty(repo)):
-        return "local", None
-    if mode == "codebase":
-        return "codebase", None
-    if mode == "commit":
-        return "commit", None
-    if mode == "branch" or (mode == "auto" and branch not in TRUNK_BRANCHES):
-        return "branch", base_ref or detect_pr_base(repo) or DEFAULT_BASE
-    raise SystemExit(
-        "no review target: clean trunk checkout and no forced mode. "
-        "Pass --mode codebase, --mode branch --base <ref>, or --mode commit --commit <ref>."
-    )
-
-
-def detect_pr_base(repo: Path) -> str | None:
-    gh_bin = find_command("gh", repo)
-    if not gh_bin:
-        return None
-    result = run([gh_bin, "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], repo, check=False)
-    base = result.stdout.strip()
-    return f"origin/{base}" if result.returncode == 0 and base else None
-
-
-def resolve_command(name: str, repo: Path) -> str:
-    resolved = find_command(name, repo)
-    if resolved:
-        return resolved
-    raise SystemExit(f"executable not found: {name}. Install it or pass an explicit trusted path when supported.")
-
-
-def find_command(name: str, repo: Path) -> str | None:
-    command = Path(name)
-    if has_directory_component(name, command):
-        base = command if command.is_absolute() else repo / command
-        return first_executable_candidate(base)
-    for part in os.environ.get("PATH", "").split(os.pathsep):
-        if not part or part == ".":
-            continue
-        path_part = Path(part)
-        if not path_part.is_absolute():
-            continue
-        try:
-            resolved_part = path_part.resolve()
-            resolved_repo = repo.resolve()
-        except OSError:
-            continue
-        if is_within(resolved_part, resolved_repo):
-            continue
-        found = first_executable_candidate(resolved_part / name, reject_root=resolved_repo)
-        if found:
-            return found
-    return None
-
-
-def is_within(path: Path, root: Path) -> bool:
-    return path == root or path.is_relative_to(root)
-
-
-def has_directory_component(name: str, command: Path) -> bool:
-    separators = [separator for separator in (os.sep, os.altsep) if separator]
-    return command.is_absolute() or bool(command.drive) or any(separator in name for separator in separators)
-
-
-def first_executable_candidate(path: Path, *, reject_root: Path | None = None) -> str | None:
-    if os.name == "nt" and not path.suffix:
-        extensions = [ext for ext in os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") if ext]
-        candidates = [path.with_suffix(ext.lower()) for ext in extensions]
-        candidates.extend(path.with_suffix(ext.upper()) for ext in extensions)
-        candidates.append(path)
-    else:
-        candidates = [path]
-    for candidate in candidates:
-        if candidate.is_file() and os.access(candidate, os.X_OK):
-            if reject_root is not None:
-                try:
-                    if is_within(candidate.resolve(), reject_root):
-                        continue
-                except OSError:
-                    continue
-            return str(candidate)
-    return None
-
-
-def bounded(text: str, limit: int = 180_000) -> str:
-    if len(text) <= limit:
-        return text
-    return text[:limit] + f"\n\n[truncated at {limit} characters]\n"
-
-
-def bounded_field(text: str, limit: int) -> str:
-    if len(text) <= limit:
-        return text
-    suffix = "\n\n[truncated]"
-    return text[: max(0, limit - len(suffix))] + suffix
-
-
-def read_text(path: Path, limit: int = 40_000) -> str:
-    try:
-        data = path.read_bytes()
-    except OSError as exc:
-        return f"[unreadable: {exc}]"
-    if b"\0" in data:
-        return "[binary file omitted]"
-    text = data.decode("utf-8", errors="replace")
-    return bounded(text, limit)
-
-
-def local_bundle(repo: Path) -> str:
-    parts = [
-        "# Git Status",
-        git(repo, "status", "--short"),
-        "# Staged Diff",
-        git(repo, "diff", "--cached", "--stat"),
-        bounded(git(repo, "diff", "--cached", "--patch", "--find-renames")),
-        "# Unstaged Diff",
-        git(repo, "diff", "--stat"),
-        bounded(git(repo, "diff", "--patch", "--find-renames")),
-    ]
-    untracked = [line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line]
-    if untracked:
-        parts.append("# Untracked Files")
-        for rel in untracked:
-            path = repo / rel
-            parts.append(f"## {rel}\n{read_text(path)}")
-    return "\n\n".join(parts)
-
-
-def branch_bundle(repo: Path, base_ref: str, *, skip_fetch: bool) -> str:
-    if not skip_fetch:
-        git(repo, "fetch", "origin", "--quiet", check=False)
-    return "\n\n".join(
-        [
-            "# Branch Diff",
-            f"base: {base_ref}",
-            git(repo, "diff", "--stat", f"{base_ref}...HEAD"),
-            bounded(git(repo, "diff", "--patch", "--find-renames", f"{base_ref}...HEAD")),
-        ]
-    )
-
-
-def normalize_scope_spec(scope: str) -> str:
-    normalized = scope.strip().replace("\\", "/").removeprefix("./").rstrip("/")
-    if not normalized:
-        raise SystemExit("scope path cannot be empty")
-    path = Path(normalized)
-    if path.is_absolute() or ".." in path.parts:
-        raise SystemExit(f"scope path must be repo-relative: {scope}")
-    return normalized
-
-
-def load_scope_specs(args: argparse.Namespace) -> list[str]:
-    scopes = [normalize_scope_spec(scope) for scope in args.path or []]
-    for scope_file in args.scope_file or []:
-        path = Path(scope_file)
-        for line in path.read_text().splitlines():
-            value = line.split("#", 1)[0].strip()
-            if value:
-                scopes.append(normalize_scope_spec(value))
-    seen: set[str] = set()
-    result: list[str] = []
-    for scope in scopes:
-        if scope in seen:
-            continue
-        seen.add(scope)
-        result.append(scope)
-    return result
-
-
-def path_matches_scope(rel: str, scopes: list[str]) -> bool:
-    if not scopes:
-        return True
-    return any(
-        rel == scope
-        or rel.startswith(f"{scope}/")
-        or fnmatch.fnmatchcase(rel, scope)
-        or fnmatch.fnmatchcase(rel, f"{scope}/**")
-        for scope in scopes
-    )
-
-
-def codebase_paths(repo: Path, scopes: list[str] | None = None) -> set[str]:
-    scope_specs = scopes or []
-    paths: set[str] = set()
-    for rel in git(repo, "ls-files").splitlines():
-        if not rel or any(rel.startswith(prefix) for prefix in CODEBASE_EXCLUDED_PREFIXES):
-            continue
-        if not path_matches_scope(rel, scope_specs):
-            continue
-        paths.add(rel)
-    return paths
-
-
-def codebase_bundle(repo: Path, paths: set[str]) -> str:
-    grouped: dict[str, int] = {}
-    for rel in paths:
-        top = rel.split("/", 1)[0]
-        grouped[top] = grouped.get(top, 0) + 1
-    inventory = "\n".join(f"- {path}" for path in sorted(paths))
-    summary = "\n".join(f"- {name}: {count} tracked files" for name, count in sorted(grouped.items()))
-    return "\n\n".join(
-        [
-            "# Codebase Audit",
-            "This is a repository-wide audit, not a diff review. The reviewer may inspect in-scope files with read-only tools.",
-            "# Scope Summary",
-            summary or "[no tracked files]",
-            "# In-Scope Tracked Files",
-            bounded(inventory, 120_000),
-        ]
-    )
-
-
-def commit_bundle(repo: Path, commit_ref: str) -> str:
-    return "\n\n".join(
-        [
-            "# Commit Diff",
-            f"commit: {commit_ref}",
-            git(repo, "show", "--stat", "--format=fuller", commit_ref),
-            bounded(git(repo, "show", "--patch", "--find-renames", "--format=fuller", commit_ref)),
-        ]
-    )
-
-
-def review_paths(
-    repo: Path, target: str, target_ref: str | None, commit_ref: str, scopes: list[str] | None = None
-) -> set[str]:
-    names: set[str] = set()
-    if target == "codebase":
-        return codebase_paths(repo, scopes)
-    if target == "local":
-        sources = [
-            git(repo, "diff", "--name-only", "--cached"),
-            git(repo, "diff", "--name-only"),
-            git(repo, "ls-files", "--others", "--exclude-standard"),
-        ]
-    elif target == "branch":
-        assert target_ref
-        sources = [git(repo, "diff", "--name-only", f"{target_ref}...HEAD")]
-    else:
-        sources = [git(repo, "show", "--name-only", "--format=", commit_ref)]
-    for source in sources:
-        for line in source.splitlines():
-            path = line.strip()
-            if path and path_matches_scope(path, scopes or []):
-                names.add(path)
-    return names
-
-
-def load_extra_prompt(args: argparse.Namespace) -> str:
-    chunks: list[str] = []
-    for value in args.prompt or []:
-        chunks.append(value)
-    for path in args.prompt_file or []:
-        chunks.append(Path(path).read_text())
-    return "\n\n".join(chunks)
-
-
-def load_datasets(args: argparse.Namespace) -> str:
-    chunks: list[str] = []
-    for spec in args.dataset or []:
-        path = Path(spec)
-        if path.is_dir():
-            raise SystemExit(f"--dataset must be a file, got directory: {path}")
-        chunks.append(f"# Dataset: {path}\n{read_text(path)}")
-    return "\n\n".join(chunks)
-
-
-def instruction_paths(repo: Path, scope_paths: set[str]) -> list[Path]:
-    paths = {repo / "AGENTS.md"}
-    for rel in scope_paths:
-        rel_path = Path(rel)
-        if rel_path.is_absolute() or ".." in rel_path.parts:
-            continue
-        current = (repo / rel_path).parent
-        while True:
-            candidate = current / "AGENTS.md"
-            if candidate.exists():
-                paths.add(candidate)
-            if current == repo or current.parent == current:
-                break
-            current = current.parent
-    return sorted(path for path in paths if path.exists())
-
-
-def instruction_bundle(repo: Path, scope_paths: set[str]) -> str:
-    paths = instruction_paths(repo, scope_paths)
-    if not paths:
-        return "# Repository Instructions\n[no AGENTS.md files found]"
-    parts = ["# Repository Instructions"]
-    for path in paths:
-        rel = path.relative_to(repo)
-        parts.append(f"## {rel}\n{read_text(path, limit=80_000)}")
-    return "\n\n".join(parts)
-
-
-def build_prompt(
-    repo: Path,
-    target: str,
-    target_ref: str | None,
-    scope_paths: set[str],
-    instructions: str,
-    bundle: str,
-    extra_prompt: str,
-    datasets: str,
-) -> str:
-    target_line = f"{target} {target_ref}" if target_ref else target
-    scope_label = "Changed Paths" if target != "codebase" else "Review Scope"
-    scope_list = "\n".join(f"- {path}" for path in sorted(scope_paths)) or "[no paths]"
-    scope_rule = (
-        "Report only actionable defects introduced or exposed by this change."
-        if target != "codebase"
-        else (
-            "Report concrete actionable defects in the in-scope EEGPrep-owned codebase. "
-            "Do not require diff provenance, but do avoid stale vendored/reference code and vague wishlist items."
-        )
-    )
-    return textwrap.dedent(
-        f"""
-        You are a senior EEGPrep code reviewer. Review the provided bundle and inspect files as needed.
-
-        Hard rules:
-        - Return exactly one JSON object and nothing else. Do not wrap it in Markdown.
-        - The JSON object must match this schema exactly:
-        {json.dumps(SCHEMA, indent=2)}
-        - Do not modify files.
-        - Do not invoke nested reviewers or review tools.
-        - Forbidden nested review commands include: codex review, autoreview, claude review, oracle review.
-        - You may use read-only tools and web search to inspect files, dependency contracts, upstream docs, current behavior, and security implications.
-        - Shell commands, if available, must be read-only inspection commands. Do not run tests, formatters, package installs, generators, network mutation commands, git mutation commands, or commands that write files.
-        - {scope_rule}
-        - Prefer high-signal findings over style feedback.
-        - Include security findings only for concrete risks: injection, secret leaks, authz/authn bypass, path traversal, unsafe deserialization, unsafe filesystem/shell use, privacy leaks, and credential handling.
-        - Do not reject legitimate functionality merely because it touches shell, filesystem, network, auth, or sensitive data. Report a security finding only when the patch creates a concrete exploitable risk, removes an important safety check, or lacks validation at a trust boundary.
-        - For each finding, use the smallest file/line location that demonstrates the issue.
-        - If there are no actionable findings, return an empty findings array and mark the patch correct.
-
-        Structural quality bar:
-        - Do not approve code merely because it works. Flag architecture that makes EEGPrep harder to ship reliably.
-        - Look for a simpler "code judo" move: preserving behavior while deleting branches, modes, helper layers, or special cases.
-        - Flag spaghetti growth: ad-hoc conditionals inserted into busy flows, one-off booleans, nullable modes, scattered feature checks, and partial state updates.
-        - Flag logic in the wrong layer: GUI/session code doing signal processing, pop_* wrappers owning low-level math, CLI/console code bypassing session/history contracts, or runtime paths depending on vendored EEGLAB.
-        - Prefer canonical helpers and existing contracts over bespoke near-duplicates. Report duplicate helpers when a clear canonical home already exists.
-        - Be skeptical of thin wrappers, identity abstractions, cast-heavy/loosely-shaped boundaries, or generic magic that hides simple EEG data invariants.
-        - Treat file sprawl as a smell. In diff review, flag changes that push a file past roughly 1000 lines without a strong reason. In codebase audit, flag oversized modules only when there is a concrete bug-prone coupling or a focused decomposition path.
-        - Prefer remedies that remove concepts, collapse duplicate branches, make state updates atomic, clarify typed/data boundaries, or move logic to the owning module.
-
-        EEGPrep review priorities:
-        - Correctness bugs, import/runtime failures, wrong numerical results, and broken common workflows.
-        - EEGLAB parity in APIs, pop_* wrappers, history commands, GUI behavior, event semantics, and expected data structures.
-        - EEG dict fields: data, nbchan, pnts, trials, srate, xmin, xmax, times, chanlocs, event, urevent, epoch, history, icaact, icawinv, icasphere, icaweights, icachansind.
-        - MATLAB/Python indexing boundaries: EEGLAB event latencies and user-facing indices are usually 1-based; Python arrays are 0-based.
-        - Channel-major data shape: continuous (nbchan, pnts), epoched (nbchan, pnts, trials).
-        - GUI plus eegprep-console session sync through EEGPrepSession.
-        - User-facing pop_* contracts: return_com=True, (EEG, com) returns, history strings, and GUI/session update paths.
-        - Runtime code must not depend on src/eegprep/eeglab existing; use it only as a development reference.
-        - GUI Help/pophelp needs EEGPrep-owned packaged Markdown resources.
-        - Missing tests only when tied to a concrete changed or audited behavior.
-        - Realistic EEG-size performance regressions.
-
-        Review target: {target_line}
-        Repository: {repo}
-
-        # {scope_label}
-        {scope_list}
-
-        {extra_prompt}
-
-        {datasets}
-
-        {instructions}
-
-        # Change Bundle
-        {bundle}
-        """
-    ).strip()
-
-
-def write_json_temp(data: dict[str, Any]) -> Path:
-    handle = tempfile.NamedTemporaryFile("w", suffix=".json", delete=False)
-    with handle:
-        json.dump(data, handle)
-    return Path(handle.name)
-
-
-def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if not args.tools:
-        raise SystemExit(
-            "--no-tools is not supported by the Codex engine; use --engine claude --no-tools for a no-tools run"
-        )
-    schema_path = write_json_temp(SCHEMA)
-    output_path = Path(tempfile.NamedTemporaryFile("w", suffix=".json", delete=False).name)
-    cmd = [resolve_command(args.codex_bin, repo), "--ask-for-approval", "never"]
-    if args.web_search:
-        cmd.append("--search")
-    if args.model:
-        cmd.extend(["--model", args.model])
-    if args.thinking:
-        cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"'])
-    cmd.append("exec")
-    if args.stream_engine_output:
-        cmd.append("--json")
-    cmd.extend(
-        [
-            "--ephemeral",
-            "-C",
-            str(repo),
-            "-s",
-            "read-only",
-            "--output-schema",
-            str(schema_path),
-            "--output-last-message",
-            str(output_path),
-            "-",
-        ]
-    )
-    result = run_with_heartbeat(
-        cmd,
-        repo,
-        input_text=prompt,
-        label="codex",
-        heartbeat_seconds=args.heartbeat_seconds,
-        stream_output=args.stream_engine_output,
-        stream_display=CodexStreamDisplay() if args.stream_engine_output else None,
-    )
-    try:
-        output = output_path.read_text()
-    finally:
-        schema_path.unlink(missing_ok=True)
-        output_path.unlink(missing_ok=True)
-    if result.returncode != 0:
-        raise SystemExit(f"codex engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return output or result.stdout
-
-
-def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    cmd = [
-        resolve_command(args.claude_bin, repo),
-        "--print",
-        "--no-session-persistence",
-        "--output-format",
-        "stream-json" if args.stream_engine_output else "json",
-        "--json-schema",
-        json.dumps(SCHEMA),
-    ]
-    if args.tools:
-        cmd.extend(["--allowedTools", claude_allowed_tools(args)])
-    else:
-        cmd.extend(["--tools", ""])
-    if args.stream_engine_output:
-        cmd.append("--verbose")
-    if args.model:
-        cmd.extend(["--model", args.model])
-    if args.thinking:
-        cmd.extend(["--effort", args.thinking])
-    result = run_with_heartbeat(
-        cmd,
-        repo,
-        input_text=prompt,
-        label="claude",
-        heartbeat_seconds=args.heartbeat_seconds,
-        stream_output=args.stream_engine_output,
-        stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None,
-    )
-    if result.returncode != 0:
-        raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return result.stdout
-
-
-def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if args.thinking:
-        raise SystemExit("--thinking is not supported by the droid engine")
-    prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name)
-    prompt_path.write_text(prompt)
-    cmd = [
-        resolve_command(args.droid_bin, repo),
-        "exec",
-        "--cwd",
-        str(repo),
-        "--output-format",
-        "json",
-        "-f",
-        str(prompt_path),
-    ]
-    if args.model:
-        cmd.extend(["--model", args.model])
-    if not args.tools:
-        cmd.extend(["--disabled-tools", "*"])
-    result = run_with_heartbeat(
-        cmd,
-        repo,
-        label="droid",
-        heartbeat_seconds=args.heartbeat_seconds,
-        stream_output=args.stream_engine_output,
-    )
-    prompt_path.unlink(missing_ok=True)
-    if result.returncode != 0:
-        raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return result.stdout
-
-
-def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if args.thinking:
-        raise SystemExit("--thinking is not supported by the copilot engine")
-    if not args.tools:
-        raise SystemExit(
-            "--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv"
-        )
-    with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir:
-        prompt_path = Path(tempdir) / "prompt.txt"
-        prompt_path.write_text(prompt)
-        os.chmod(prompt_path, 0o600)
-        cmd = [
-            resolve_command(args.copilot_bin, repo),
-            "-C",
-            tempdir,
-            "-p",
-            "Read ./prompt.txt and follow it exactly. Return only the requested JSON object.",
-            "--output-format",
-            "json",
-            "--stream",
-            "on" if args.stream_engine_output else "off",
-            "--no-ask-user",
-            "--disable-builtin-mcps",
-        ]
-        if args.model:
-            cmd.extend(["--model", args.model])
-        cmd.extend(
-            [
-                "--available-tools=read_agent,rg,view,web_fetch",
-                "--allow-tool=read_agent",
-                "--allow-tool=rg",
-                "--allow-tool=view",
-                "--allow-tool=web_fetch",
-            ]
-        )
-        if args.web_search:
-            cmd.append("--allow-all-urls")
-        result = run_with_heartbeat(
-            cmd,
-            Path(tempdir),
-            label="copilot",
-            heartbeat_seconds=args.heartbeat_seconds,
-            stream_output=args.stream_engine_output,
-        )
-    if result.returncode != 0:
-        raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}")
-    return result.stdout
-
-
-class CodexStreamDisplay:
-    def __init__(self, *, activity_seconds: int = 20) -> None:
-        self.activity_seconds = activity_seconds
-        self.hidden_events = 0
-        self.last_visible = time.monotonic()
-
-    def __call__(self, name: str, line: str) -> str | None:
-        if name != "stdout":
-            return line
-        try:
-            event = json.loads(line)
-        except json.JSONDecodeError:
-            return self.visible(line)
-        event_type = event.get("type")
-        if event_type == "thread.started":
-            return self.visible(f"codex thread: {event.get('thread_id', '<unknown>')}\n")
-        if event_type == "turn.started":
-            return self.visible("codex turn started\n")
-        if event_type == "turn.completed":
-            usage = event.get("usage")
-            message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n"
-            return self.visible(self.flush_hidden() + message)
-        item = event.get("item")
-        if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str):
-            return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n")
-        return self.hidden_activity()
-
-    def hidden_activity(self) -> str | None:
-        self.hidden_events += 1
-        if time.monotonic() - self.last_visible < self.activity_seconds:
-            return None
-        return self.visible(self.flush_hidden())
-
-    def flush_hidden(self) -> str:
-        if not self.hidden_events:
-            return ""
-        count = self.hidden_events
-        self.hidden_events = 0
-        return f"codex activity: {count} hidden tool/status events\n"
-
-    def visible(self, text: str) -> str:
-        self.last_visible = time.monotonic()
-        return text
-
-
-class ClaudeStreamDisplay:
-    def __init__(self, *, activity_seconds: int = 20) -> None:
-        self.activity_seconds = activity_seconds
-        self.hidden_events = 0
-        self.last_visible = time.monotonic()
-        self.started = False
-
-    def __call__(self, name: str, line: str) -> str | None:
-        if name != "stdout":
-            return line
-        try:
-            event = json.loads(line)
-        except json.JSONDecodeError:
-            return self.visible(line)
-        event_type = event.get("type")
-        if event_type == "system" and not self.started:
-            self.started = True
-            return self.visible("claude turn started\n")
-        if event_type == "assistant":
-            return self.assistant_message(event)
-        if event_type == "result":
-            return self.visible(self.flush_hidden() + self.result_summary(event))
-        return self.hidden_activity()
-
-    def assistant_message(self, event: dict[str, Any]) -> str | None:
-        message = event.get("message")
-        if not isinstance(message, dict):
-            return self.hidden_activity()
-        chunks: list[str] = []
-        for item in message.get("content", []):
-            if not isinstance(item, dict):
-                continue
-            if item.get("type") == "text" and isinstance(item.get("text"), str):
-                chunks.append(item["text"].rstrip())
-        if chunks:
-            return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n")
-        return self.hidden_activity()
-
-    def result_summary(self, event: dict[str, Any]) -> str:
-        usage = event.get("usage")
-        fields: list[str] = []
-        if isinstance(usage, dict):
-            for key in (
-                "input_tokens",
-                "cache_read_input_tokens",
-                "cache_creation_input_tokens",
-                "output_tokens",
-            ):
-                value = usage.get(key)
-                if isinstance(value, int):
-                    fields.append(f"{key}={value}")
-        cost = event.get("total_cost_usd")
-        if isinstance(cost, (int, float)) and not isinstance(cost, bool):
-            fields.append(f"cost_usd={cost:.6f}")
-        return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n"
-
-    def hidden_activity(self) -> str | None:
-        self.hidden_events += 1
-        if time.monotonic() - self.last_visible < self.activity_seconds:
-            return None
-        return self.visible(self.flush_hidden())
-
-    def flush_hidden(self) -> str:
-        if not self.hidden_events:
-            return ""
-        count = self.hidden_events
-        self.hidden_events = 0
-        return f"claude activity: {count} hidden tool/status events\n"
-
-    def visible(self, text: str) -> str:
-        self.last_visible = time.monotonic()
-        return text
-
-
-def format_codex_usage(usage: dict[str, Any]) -> str:
-    fields = [
-        "input_tokens",
-        "cached_input_tokens",
-        "output_tokens",
-        "reasoning_output_tokens",
-    ]
-    parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)]
-    return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable"
-
-
-def claude_allowed_tools(args: argparse.Namespace) -> str:
-    tools = [tool.strip() for tool in args.claude_allowed_tools.split(",") if tool.strip()]
-    if not args.web_search:
-        tools = [tool for tool in tools if tool not in {"WebSearch", "WebFetch"}]
-    return ",".join(tools)
-
-
-def extract_json(text: str) -> dict[str, Any]:
-    stripped = text.strip()
-    if not stripped:
-        raise SystemExit("review engine returned empty output")
-    try:
-        parsed = json.loads(stripped)
-    except json.JSONDecodeError as exc:
-        fenced_report = parse_json_candidate(stripped)
-        if isinstance(fenced_report, dict) and "findings" in fenced_report:
-            return fenced_report
-        jsonl_report = extract_json_from_jsonl(stripped)
-        if jsonl_report:
-            return jsonl_report
-        raise SystemExit(f"review engine returned non-JSON output: {exc}\n{stripped[:2000]}")
-    if isinstance(parsed, dict) and "findings" in parsed:
-        return parsed
-    if isinstance(parsed, dict) and isinstance(parsed.get("structured_output"), dict):
-        return parsed["structured_output"]
-    if isinstance(parsed, dict) and isinstance(parsed.get("result"), str):
-        result_json = parse_json_candidate(parsed["result"])
-        if isinstance(result_json, dict) and "findings" in result_json:
-            return result_json
-        raise SystemExit(f"review engine result was not structured JSON:\n{parsed['result'][:2000]}")
-    jsonl_report = extract_json_from_jsonl(stripped)
-    if jsonl_report:
-        return jsonl_report
-    raise SystemExit(f"review engine returned unexpected JSON shape:\n{json.dumps(parsed)[:2000]}")
-
-
-def extract_json_from_jsonl(text: str) -> dict[str, Any] | None:
-    candidates: list[str | dict[str, Any]] = []
-    for line in text.splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            event = json.loads(line)
-        except json.JSONDecodeError:
-            continue
-        if not isinstance(event, dict):
-            continue
-        part = event.get("part")
-        if isinstance(part, dict) and isinstance(part.get("text"), str):
-            candidates.append(part["text"])
-        data = event.get("data")
-        if isinstance(data, dict) and isinstance(data.get("content"), str):
-            candidates.append(data["content"])
-        if isinstance(event.get("result"), str):
-            candidates.append(event["result"])
-        if isinstance(event.get("structured_output"), dict):
-            candidates.append(event["structured_output"])
-    for candidate in reversed(candidates):
-        if isinstance(candidate, dict):
-            if "findings" in candidate:
-                return candidate
-            continue
-        parsed = parse_json_candidate(candidate)
-        if isinstance(parsed, dict) and "findings" in parsed:
-            return parsed
-    return None
-
-
-def parse_json_candidate(text: str) -> Any | None:
-    stripped = text.strip()
-    if stripped.startswith("```"):
-        lines = stripped.splitlines()
-        if lines and lines[0].startswith("```") and lines[-1].strip() == "```":
-            stripped = "\n".join(lines[1:-1]).strip()
-    try:
-        parsed = json.loads(stripped)
-    except json.JSONDecodeError:
-        return None
-    if isinstance(parsed, str) and parsed != text:
-        nested = parse_json_candidate(parsed)
-        return nested if nested is not None else parsed
-    return parsed
-
-
-def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None:
-    allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"}
-    extra_top = set(report) - allowed_top
-    if extra_top:
-        raise SystemExit(f"review JSON has unexpected top-level keys: {sorted(extra_top)}")
-    for key in SCHEMA["required"]:
-        if key not in report:
-            raise SystemExit(f"review JSON missing required key: {key}")
-    if not isinstance(report["findings"], list):
-        raise SystemExit("review JSON findings must be an array")
-    if report.get("overall_correctness") not in {"patch is correct", "patch is incorrect"}:
-        raise SystemExit(f"review JSON has invalid overall_correctness: {report.get('overall_correctness')}")
-    if not isinstance(report.get("overall_explanation"), str) or not report["overall_explanation"]:
-        raise SystemExit("review JSON overall_explanation must be a non-empty string")
-    if len(report["overall_explanation"]) > 3000:
-        raise SystemExit("review JSON overall_explanation is too long")
-    if not number_in_range(report.get("overall_confidence")):
-        raise SystemExit("review JSON overall_confidence must be numeric")
-    finding_text = ""
-    kept_findings: list[dict[str, Any]] = []
-    ignored_findings: list[tuple[int, dict[str, Any], str, int]] = []
-    for index, finding in enumerate(report["findings"]):
-        if not isinstance(finding, dict):
-            raise SystemExit(f"finding {index} must be an object")
-        allowed_finding = {"title", "body", "priority", "confidence", "category", "code_location"}
-        extra_finding = set(finding) - allowed_finding
-        if extra_finding:
-            raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}")
-        for key in allowed_finding:
-            if key not in finding:
-                raise SystemExit(f"finding {index} missing required key: {key}")
-        title = finding.get("title")
-        if not isinstance(title, str) or not title or len(title) > 140:
-            raise SystemExit(f"finding {index} has invalid title")
-        body = finding.get("body")
-        if not isinstance(body, str) or not body or len(body) > 2400:
-            raise SystemExit(f"finding {index} has invalid body")
-        priority = finding.get("priority")
-        if priority not in {"P0", "P1", "P2", "P3"}:
-            raise SystemExit(f"finding {index} has invalid priority: {priority}")
-        if not number_in_range(finding.get("confidence")):
-            raise SystemExit(f"finding {index} has invalid confidence")
-        category = finding.get("category")
-        if category not in CATEGORIES:
-            raise SystemExit(f"finding {index} has invalid category: {category}")
-        location = finding.get("code_location")
-        if not isinstance(location, dict):
-            raise SystemExit(f"finding {index} missing code_location")
-        rel = str(location.get("file_path", "")).strip()
-        line = location.get("line")
-        if not rel or not isinstance(line, int) or line < 1:
-            raise SystemExit(f"finding {index} has invalid location: {location}")
-        if Path(rel).is_absolute() or ".." in Path(rel).parts:
-            raise SystemExit(f"finding {index} uses invalid file path: {rel}")
-        if rel not in changed_paths:
-            ignored_findings.append((index, finding, rel, line))
-            continue
-        kept_findings.append(finding)
-        finding_text += "\n" + json.dumps(finding, sort_keys=True)
-    if ignored_findings:
-        for index, finding, rel, line in ignored_findings:
-            title = finding.get("title", "<untitled>")
-            print(
-                f"autoreview ignored out-of-scope finding {index}: {title} ({rel}:{line})",
-                file=sys.stderr,
-            )
-            print(bounded_field(str(finding.get("body", "")), 500), file=sys.stderr)
-        report["findings"] = kept_findings
-        if not kept_findings and report["overall_correctness"] == "patch is incorrect":
-            note = f"Ignored {len(ignored_findings)} out-of-scope finding(s) outside the reviewed change."
-            explanation = report["overall_explanation"].rstrip()
-            report["overall_correctness"] = "patch is correct"
-            report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000)
-    haystack = finding_text.lower()
-    for needle in required:
-        if needle.lower() not in haystack:
-            raise SystemExit(f"required finding text not found: {needle}")
-
-
-def number_in_range(value: Any) -> bool:
-    return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1
-
-
-def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None:
-    findings = report["findings"]
-    if findings:
-        print(f"{label} findings: {len(findings)}")
-    elif report["overall_correctness"] == "patch is incorrect":
-        print(f"{label} verdict: patch is incorrect without discrete findings")
-    else:
-        print(f"{label} clean: no accepted/actionable findings reported")
-    for finding in findings:
-        loc = finding["code_location"]
-        print(f"[{finding['priority']}] {finding['title']}")
-        print(f"{loc['file_path']}:{loc['line']}")
-        print(f"{finding['body']}")
-        print()
-    print(f"overall: {report['overall_correctness']} ({report['overall_confidence']})")
-    print(report["overall_explanation"])
-
-
-def start_parallel_tests(command: str, repo: Path, shell_kind: str) -> tuple[subprocess.Popen, float]:
-    print(f"tests: {command}")
-    if shell_kind == "default" or shell_kind == "cmd":
-        return subprocess.Popen(command, cwd=repo, shell=True), time.time()
-    if shell_kind == "powershell":
-        powershell = resolve_command("powershell", repo)
-        return subprocess.Popen(
-            [powershell, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
-            cwd=repo,
-        ), time.time()
-    if shell_kind == "pwsh":
-        pwsh = resolve_command("pwsh", repo)
-        return subprocess.Popen(
-            [pwsh, "-NoProfile", "-Command", command],
-            cwd=repo,
-        ), time.time()
-    raise SystemExit(f"invalid --parallel-tests-shell/AUTOREVIEW_PARALLEL_TESTS_SHELL: {shell_kind}")
-
-
-def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int:
-    proc.wait()
-    print(f"tests exit: {proc.returncode} after {int(time.time() - started)}s")
-    return int(proc.returncode or 0)
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Bundle-driven AI code review.")
-    parser.add_argument(
-        "--mode",
-        choices=["auto", "local", "uncommitted", "branch", "commit", "codebase"],
-        default="auto",
-    )
-    parser.add_argument("--base")
-    parser.add_argument("--commit", default="HEAD")
-    parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch origin before branch diffs.")
-    parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex"))
-    parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.")
-    parser.add_argument(
-        "--panel",
-        action="store_true",
-        help="Run a Codex/Claude review panel unless --engine changes the first reviewer.",
-    )
-    parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.")
-    parser.add_argument(
-        "--thinking",
-        action="append",
-        help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.",
-    )
-    parser.add_argument(
-        "--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails."
-    )
-    parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex"))
-    parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude"))
-    parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid"))
-    parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot"))
-    parser.add_argument(
-        "--no-tools",
-        dest="tools",
-        action="store_false",
-        default=True,
-        help="Disable tools for engines that support it. Codex and copilot reject no-tools review.",
-    )
-    parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True)
-    parser.add_argument(
-        "--claude-allowed-tools",
-        default=os.environ.get(
-            "AUTOREVIEW_CLAUDE_TOOLS",
-            "Read,Grep,Glob,WebSearch,WebFetch",
-        ),
-    )
-    parser.add_argument("--prompt", action="append", help="Additional review instruction text.")
-    parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.")
-    parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.")
-    parser.add_argument(
-        "--path",
-        action="append",
-        help="Repo-relative file, directory, or glob to include in the review scope. Repeat for multiple scopes.",
-    )
-    parser.add_argument(
-        "--scope-file",
-        action="append",
-        help="File containing repo-relative --path scopes, one per line. Blank lines and # comments are ignored.",
-    )
-    parser.add_argument("--output", help="Write human output to a file as well as stdout.")
-    parser.add_argument("--json-output", help="Write validated structured review JSON.")
-    parser.add_argument("--heartbeat-seconds", type=int, default=60)
-    parser.add_argument(
-        "--stream-engine-output",
-        action="store_true",
-        default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1",
-        help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.",
-    )
-    parser.add_argument(
-        "--parallel-tests", help="Run a test command concurrently with review; failure fails the helper."
-    )
-    parser.add_argument(
-        "--parallel-tests-shell",
-        choices=["default", "cmd", "powershell", "pwsh"],
-        default=os.environ.get("AUTOREVIEW_PARALLEL_TESTS_SHELL", "default"),
-        help="Shell for --parallel-tests. Default preserves Python shell=True platform behavior; use powershell or pwsh for PowerShell-specific commands.",
-    )
-    parser.add_argument(
-        "--require-finding", action="append", default=[], help="Require finding text to contain this substring."
-    )
-    parser.add_argument(
-        "--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests."
-    )
-    parser.add_argument("--dry-run", action="store_true")
-    args = parser.parse_args()
-    if args.engine not in ENGINES:
-        raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}")
-    return args
-
-
-def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str:
-    if args.engine == "codex":
-        return run_codex(args, repo, prompt)
-    if args.engine == "claude":
-        return run_claude(args, repo, prompt)
-    if args.engine == "droid":
-        return run_droid(args, repo, prompt)
-    if args.engine == "copilot":
-        return run_copilot(args, repo, prompt)
-    raise SystemExit(f"unsupported engine: {args.engine}")
-
-
-def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]:
-    global_value: str | None = None
-    per_engine: dict[str, str] = {}
-    for raw in values or []:
-        value = raw.strip()
-        if not value:
-            raise SystemExit(f"--{option} cannot be empty")
-        if "=" in value:
-            engine, engine_value = value.split("=", 1)
-            engine = engine.strip()
-            engine_value = engine_value.strip()
-            if engine not in ENGINES:
-                raise SystemExit(f"--{option} uses unknown engine: {engine}")
-            if not engine_value:
-                raise SystemExit(f"--{option} for {engine} cannot be empty")
-            if engine in per_engine:
-                raise SystemExit(f"--{option} specified more than once for {engine}")
-            per_engine[engine] = engine_value
-        else:
-            if global_value is not None:
-                raise SystemExit(f"--{option} global value specified more than once")
-            global_value = value
-    return global_value, per_engine
-
-
-def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]:
-    parts = [part.strip() for part in token.split(":")]
-    if len(parts) > 3 or not parts[0]:
-        raise SystemExit(f"invalid reviewer spec: {token}")
-    engine = parts[0]
-    if engine not in ENGINES:
-        raise SystemExit(f"unknown reviewer engine: {engine}")
-    model = parts[1] if len(parts) >= 2 and parts[1] else None
-    thinking = parts[2] if len(parts) == 3 and parts[2] else None
-    return engine, model, thinking
-
-
-def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]:
-    global_model, model_by_engine = parse_keyed_options(args.model, "model")
-    global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking")
-    reviewers: list[tuple[str, str | None, str | None]] = []
-    if args.reviewers:
-        tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()]
-        if len(tokens) == 1 and tokens[0] == "all":
-            tokens = list(ENGINES)
-        reviewers = [parse_reviewer_token(token) for token in tokens]
-    elif args.panel:
-        engines = [args.engine]
-        for engine in ("codex", "claude"):
-            if engine not in engines:
-                engines.append(engine)
-        reviewers = [(engine, None, None) for engine in engines]
-    else:
-        reviewers = [(args.engine, None, None)]
-
-    seen: set[str] = set()
-    result: list[argparse.Namespace] = []
-    for engine, inline_model, inline_thinking in reviewers:
-        if engine in seen:
-            raise SystemExit(f"reviewer specified more than once: {engine}")
-        seen.add(engine)
-        model = inline_model or model_by_engine.get(engine) or global_model
-        thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking
-        if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]:
-            valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none"
-            raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})")
-        clone = copy.copy(args)
-        clone.engine = engine
-        clone.model = model
-        clone.thinking = thinking
-        result.append(clone)
-    return result
-
-
-def reviewer_label(args: argparse.Namespace) -> str:
-    parts = [args.engine]
-    if args.model:
-        parts.append(f"model={args.model}")
-    if args.thinking:
-        parts.append(f"thinking={args.thinking}")
-    return " ".join(parts)
-
-
-def run_reviewer(
-    args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str]
-) -> dict[str, Any]:
-    raw = run_engine(args, repo, prompt)
-    report = extract_json(raw)
-    validate_report(report, repo, changed_paths, required)
-    return report
-
-
-def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]:
-    findings: list[dict[str, Any]] = []
-    seen: set[tuple[str, int, str, str]] = set()
-    for label, report in reports:
-        for finding in report["findings"]:
-            location = finding["code_location"]
-            key = (
-                location["file_path"],
-                location["line"],
-                finding["category"],
-                " ".join(finding["title"].lower().split()),
-            )
-            if key in seen:
-                continue
-            seen.add(key)
-            merged = copy.deepcopy(finding)
-            merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2400)
-            findings.append(merged)
-    incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports)
-    summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports)
-    return {
-        "findings": findings,
-        "overall_correctness": "patch is incorrect" if incorrect else "patch is correct",
-        "overall_explanation": f"Panel review complete. {summary}.",
-        "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5),
-    }
-
-
-def run_panel(
-    args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str]
-) -> dict[str, Any]:
-    reports: list[tuple[str, dict[str, Any]]] = []
-    failures: list[str] = []
-    with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor:
-        future_by_label = {
-            executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer)
-            for reviewer in reviewers
-        }
-        for future in concurrent.futures.as_completed(future_by_label):
-            label = future_by_label[future]
-            try:
-                reports.append((label, future.result()))
-            except SystemExit as exc:
-                failures.append(f"{label}: {exc}")
-            except Exception as exc:
-                failures.append(f"{label}: {exc}")
-    if failures and not args.allow_partial_panel:
-        raise SystemExit("autoreview panel failed\n" + "\n".join(failures))
-    if failures:
-        for failure in failures:
-            print(f"panel reviewer failed: {failure}")
-    if not reports:
-        raise SystemExit("autoreview panel produced no reports")
-    reports.sort(key=lambda item: item[0])
-    report = merge_panel_reports(reports)
-    validate_report(report, repo, changed_paths, args.require_finding)
-    return report
-
-
-def main() -> int:
-    args = parse_args()
-    scope_specs = load_scope_specs(args)
-    reviewers = reviewer_args(args)
-    repo = repo_root()
-    target, target_ref = choose_target(repo, args.mode, args.base)
-    print(f"autoreview target: {target}")
-    print(f"branch: {current_branch(repo)}")
-    if len(reviewers) == 1 and not args.reviewers and not args.panel:
-        print(f"engine: {reviewers[0].engine}")
-        if reviewers[0].model:
-            print(f"model: {reviewers[0].model}")
-        if reviewers[0].thinking:
-            print(f"thinking: {reviewers[0].thinking}")
-    else:
-        print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}")
-    print(f"tools: {'on' if args.tools else 'off'}")
-    print(f"web_search: {'on' if args.web_search else 'off'}")
-    display_ref = args.commit if target == "commit" else target_ref
-    if display_ref:
-        print(f"ref: {display_ref}")
-    if scope_specs:
-        print(f"path filters: {', '.join(scope_specs)}")
-
-    if target == "local":
-        bundle = local_bundle(repo)
-    elif target == "branch":
-        assert target_ref
-        bundle = branch_bundle(repo, target_ref, skip_fetch=args.skip_fetch)
-    elif target == "codebase":
-        changed_paths = codebase_paths(repo, scope_specs)
-        bundle = codebase_bundle(repo, changed_paths)
-    else:
-        bundle = commit_bundle(repo, args.commit)
-        target_ref = args.commit
-        changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs)
-    if target != "codebase":
-        changed_paths = review_paths(repo, target, target_ref, args.commit, scope_specs)
-    if scope_specs and not changed_paths:
-        raise SystemExit("no files matched --path/--scope-file for this review target")
-    instructions = instruction_bundle(repo, changed_paths)
-    prompt = build_prompt(
-        repo,
-        target,
-        target_ref,
-        changed_paths,
-        instructions,
-        bundle,
-        load_extra_prompt(args),
-        load_datasets(args),
-    )
-    print(f"scope paths: {len(changed_paths)}")
-    print(f"bundle: {len(prompt)} chars")
-    if args.dry_run:
-        return 0
-
-    tests_proc: tuple[subprocess.Popen, float] | None = None
-    if args.parallel_tests:
-        tests_proc = start_parallel_tests(args.parallel_tests, repo, args.parallel_tests_shell)
-    try:
-        if len(reviewers) == 1:
-            report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding)
-            label = "autoreview"
-        else:
-            report = run_panel(args, reviewers, repo, prompt, changed_paths)
-            label = "autoreview panel"
-        if args.json_output:
-            Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n")
-
-        if args.output:
-            original_stdout = sys.stdout
-            with Path(args.output).open("w") as handle:
-                sys.stdout = Tee(original_stdout, handle)
-                print_report(report, label=label)
-                sys.stdout = original_stdout
-        else:
-            print_report(report, label=label)
-    finally:
-        tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0
-
-    has_findings = bool(report["findings"])
-    overall_incorrect = report["overall_correctness"] == "patch is incorrect"
-    if tests_status != 0:
-        return 1
-    if args.expect_findings:
-        return 0 if has_findings else 1
-    return 1 if has_findings or overall_incorrect else 0
-
-
-class Tee:
-    def __init__(self, *streams: Any) -> None:
-        self.streams = streams
-
-    def write(self, data: str) -> None:
-        for stream in self.streams:
-            stream.write(data)
-
-    def flush(self) -> None:
-        for stream in self.streams:
-            stream.flush()
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py b/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py
deleted file mode 100755
index 5ea77a07..00000000
--- a/.agents/skills/oc-autoreview-adapted/scripts/new-review-campaign.py
+++ /dev/null
@@ -1,279 +0,0 @@
-#!/usr/bin/env python3
-"""Create an EEGPrep autoreview campaign workflow directory."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import re
-import shlex
-from datetime import UTC, datetime
-from pathlib import Path
-from typing import TypedDict
-
-
-class Packet(TypedDict):
-    id: str
-    branch: str
-    paths: list[str]
-    tests: list[str]
-
-
-DEFAULT_PACKETS: list[Packet] = [
-    {
-        "id": "01-popfunc",
-        "branch": "autoreview/popfunc",
-        "paths": [
-            "src/eegprep/functions/popfunc",
-            "src/eegprep/resources/help/pop_*.md",
-            "tests/test_pop_*.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_pop_utils.py tests/test_file_menu_pop_functions.py",
-        ],
-    },
-    {
-        "id": "02-sigproc",
-        "branch": "autoreview/sigproc",
-        "paths": [
-            "src/eegprep/functions/sigprocfunc",
-            "tests/test_*runica*.py",
-            "tests/test_*resample*.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_pop_resample_python.py tests/test_eeg_runica.py tests/test_runica.py tests/test_gui_pop_runica.py",
-        ],
-    },
-    {
-        "id": "03-gui-session",
-        "branch": "autoreview/gui-session",
-        "paths": [
-            "src/eegprep/functions/guifunc",
-            "src/eegprep/functions/adminfunc",
-            "tests/test_console_workspace.py",
-            "tests/test_gui_*.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_console_workspace.py tests/test_gui_main_window.py",
-        ],
-    },
-    {
-        "id": "04-plugins",
-        "branch": "autoreview/plugins",
-        "paths": [
-            "src/eegprep/plugins",
-            "tests/test_*clean*.py",
-            "tests/test_*iclabel*.py",
-            "tests/test_*bids*.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_gui_pop_clean_rawdata.py tests/test_iclabel.py",
-        ],
-    },
-    {
-        "id": "05-io-bids-study",
-        "branch": "autoreview/io-bids-study",
-        "paths": [
-            "src/eegprep/functions/popfunc/pop_fileio.py",
-            "src/eegprep/functions/popfunc/pop_loadset.py",
-            "src/eegprep/functions/popfunc/pop_saveset.py",
-            "src/eegprep/plugins/EEG_BIDS",
-            "src/eegprep/functions/studyfunc",
-            "tests/test_*study*.py",
-            "tests/test_*bids*.py",
-            "tests/test_file_menu_pop_functions.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_file_menu_pop_functions.py tests/test_study_metadata.py tests/test_study_measures.py tests/test_study_clustering.py tests/test_study_end_to_end.py",
-        ],
-    },
-    {
-        "id": "06-cli-docs-tools",
-        "branch": "autoreview/cli-docs-tools",
-        "paths": [
-            "src/eegprep/cli",
-            "docs/source",
-            ".agents/skills",
-            "tools",
-            "scripts",
-            "tests/test_cli*.py",
-        ],
-        "tests": [
-            "uv run --no-sync pytest tests/test_cli_main.py tests/test_cli_transforms.py tests/test_cli_pipeline_qc_report.py tests/test_cli_bids_eeglab_commands.py",
-            "./pre-commit.py --changed-from origin/develop",
-        ],
-    },
-]
-
-
-def slugify(value: str) -> str:
-    slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
-    return slug[:64].strip("-") or "autoreview-campaign"
-
-
-def write_new(path: Path, content: str) -> None:
-    if not path.exists():
-        path.write_text(content, encoding="utf-8")
-
-
-def packet_prompt(packet: Packet, base: str) -> str:
-    paths = "\n".join(f"  --path {shlex.quote(path)} \\" for path in packet["paths"])
-    tests = "\n".join(f"- `{test}`" for test in packet["tests"])
-    return f"""# Packet {packet["id"]}: {packet["branch"]}
-
-## Objective
-Run a scoped EEGPrep autoreview loop for this codebase area, fix real findings from first principles, and open a PR to `{base}`.
-
-## Scope
-{chr(10).join(f"- `{path}`" for path in packet["paths"])}
-
-Fixes may touch related helpers outside this scope when required by the root cause, but keep the PR conceptually tied to this packet.
-
-## Command
-
-```bash
-.agents/skills/oc-autoreview-adapted/scripts/autoreview \\
-  --mode codebase \\
-{paths}
-  --thinking codex=xhigh
-```
-
-## Verification
-Run focused checks first:
-
-{tests}
-
-Then run broader checks if the fix affects shared behavior.
-
-## PR Requirements
-- Branch: `{packet["branch"]}`
-- Target: `{base}`
-- PR body must list every finding reviewed:
-  - Fixed: finding, root cause, files changed, tests run.
-  - Rejected: finding and why it is not real or not worth changing.
-  - Follow-up: only when real but intentionally outside this PR.
-
-## Do Not
-- Do not auto-merge.
-- Do not revert unrelated concurrent work.
-- Do not report vague architecture preferences without concrete failure modes.
-"""
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("title", nargs="?", default="EEGPrep autoreview campaign")
-    parser.add_argument("--root", default=".workflow")
-    parser.add_argument("--slug")
-    parser.add_argument("--base", default="origin/develop")
-    parser.add_argument("--max-concurrent", type=int, default=3)
-    args = parser.parse_args()
-
-    slug = slugify(args.slug or args.title)
-    run_dir = Path(args.root) / slug
-    packets_dir = run_dir / "packets"
-    results_dir = run_dir / "results"
-    packets_dir.mkdir(parents=True, exist_ok=True)
-    results_dir.mkdir(parents=True, exist_ok=True)
-
-    now = datetime.now(UTC).replace(microsecond=0).isoformat()
-    packets = [
-        {
-            "id": packet["id"],
-            "branch": packet["branch"],
-            "paths": packet["paths"],
-            "tests": packet["tests"],
-            "status": "pending",
-            "pr": None,
-        }
-        for packet in DEFAULT_PACKETS
-    ]
-    state = {
-        "title": args.title,
-        "slug": slug,
-        "created_at": now,
-        "status": "planned",
-        "base": args.base,
-        "max_concurrent_agents": args.max_concurrent,
-        "packets": packets,
-        "integration": {"status": "not_started", "notes": ""},
-    }
-    write_new(run_dir / "state.json", json.dumps(state, indent=2) + "\n")
-    write_new(
-        run_dir / "plan.md",
-        f"""# {args.title}
-
-## Goal
-Run parallel scoped autoreview loops across EEGPrep, fix real bugs/parity/architecture issues, and open PRs for human review.
-
-## Success Criteria
-- Every packet has a PR or a recorded no-change result.
-- Each PR body lists fixed, rejected, and follow-up findings.
-- Each packet reruns autoreview after fixes.
-- Integration checks pass after packet PRs merge.
-
-## Constraints
-- Keep AGENTS.md and EEGPrep's EEGLAB parity goal in force.
-- Runtime code must remain standalone and not depend on vendored EEGLAB.
-- Do not auto-merge packet PRs.
-- Max concurrent agents: {args.max_concurrent}.
-
-## Risks
-- Concurrent work conflicts: keep packet ownership mostly disjoint and resolve against authoritative code.
-- Noisy architecture findings: accept only findings with concrete failure modes.
-
-## Work Packets
-{chr(10).join(f"- `{packet['id']}` -> `{packet['branch']}`" for packet in packets)}
-
-## Integration Policy
-Parent agent tracks PRs, resolves conflicts after merges, runs broader checks, and updates final-report.md.
-""",
-    )
-    write_new(
-        run_dir / "orchestration.md",
-        f"""# Orchestration: {args.title}
-
-## Execution Rules
-- Use available subagent/thread/worktree tools when exposed by the environment.
-- Spawn at most {args.max_concurrent} packet agents at once.
-- Each packet owns its branch and opens one PR before the parent starts further work in that area.
-- If no subagent runner is available, execute packets sequentially and write notes in `results/`.
-- Parent integrates packet results; do not paste raw worker dumps as final status.
-
-## Packet Launch
-Give each worker only its packet file plus AGENTS.md context. Workers must not revert unrelated edits and must adapt to concurrent changes.
-
-## Completion Audit
-- All packet PRs created or no-change results recorded.
-- PR bodies include every finding reviewed.
-- Final integration checks recorded in `final-report.md`.
-""",
-    )
-    write_new(
-        run_dir / "final-report.md",
-        f"""# Final Report: {args.title}
-
-## Outcome
-
-## Packet PRs
-
-## Findings Fixed
-
-## Findings Rejected
-
-## Follow-ups
-
-## Integration Verification
-
-## Remaining Risks
-""",
-    )
-    for packet in DEFAULT_PACKETS:
-        write_new(packets_dir / f"{packet['id']}.md", packet_prompt(packet, args.base))
-
-    print(run_dir)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness
deleted file mode 100755
index ab98338a..00000000
--- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-harness="$script_dir/test-review-harness.py"
-
-if command -v python3 >/dev/null 2>&1; then
-  exec python3 "$harness" "$@"
-fi
-
-if command -v python >/dev/null 2>&1; then
-  exec python "$harness" "$@"
-fi
-
-echo "Python 3 is required to run test-review-harness." >&2
-exit 127
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1 b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1
deleted file mode 100644
index 51007a64..00000000
--- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.ps1
+++ /dev/null
@@ -1,51 +0,0 @@
-[CmdletBinding()]
-param(
-    [ValidateSet('buggy', 'benign')]
-    [string] $Fixture,
-
-    [ValidateSet('codex', 'claude', 'droid', 'copilot')]
-    [string[]] $Engine,
-
-    [switch] $DryRun,
-
-    [Alias('h')]
-    [switch] $Help
-)
-
-$ErrorActionPreference = 'Stop'
-
-$Harness = Join-Path $PSScriptRoot 'test-review-harness.py'
-$ForwardedArgs = @()
-
-if ($Help) {
-    $ForwardedArgs += '--help'
-}
-
-if ($PSBoundParameters.ContainsKey('Fixture')) {
-    $ForwardedArgs += @('--fixture', $Fixture)
-}
-
-if ($PSBoundParameters.ContainsKey('Engine')) {
-    foreach ($SelectedEngine in $Engine) {
-        $ForwardedArgs += @('--engine', $SelectedEngine)
-    }
-}
-
-if ($DryRun) {
-    $ForwardedArgs += '--dry-run'
-}
-
-$PyLauncher = Get-Command py -ErrorAction SilentlyContinue
-if ($null -ne $PyLauncher) {
-    & $PyLauncher.Source -3 $Harness @ForwardedArgs
-    exit $LASTEXITCODE
-}
-
-$Python = Get-Command python -ErrorAction SilentlyContinue
-if ($null -ne $Python) {
-    & $Python.Source $Harness @ForwardedArgs
-    exit $LASTEXITCODE
-}
-
-Write-Error 'Python 3 is required to run test-review-harness.'
-exit 127
diff --git a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py b/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py
deleted file mode 100755
index 8035c42f..00000000
--- a/.agents/skills/oc-autoreview-adapted/scripts/test-review-harness.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import argparse
-import os
-import shutil
-import stat
-import subprocess
-import sys
-import tempfile
-from collections.abc import Callable
-from pathlib import Path
-
-
-ENGINES = ("codex", "claude", "droid", "copilot")
-
-SAFE_INITIAL = """import numpy as np
-
-
-def trim_eeg(eeg, start_sample, stop_sample):
-    data = np.asarray(eeg["data"])
-    start = int(start_sample) - 1
-    stop = int(stop_sample)
-    trimmed = data[:, start:stop]
-    out = dict(eeg)
-    out["data"] = trimmed
-    out["pnts"] = trimmed.shape[1]
-    out["xmin"] = start / float(eeg["srate"])
-    out["xmax"] = (stop - 1) / float(eeg["srate"])
-    return out
-"""
-
-BUGGY_CHANGED = """import numpy as np
-
-
-def trim_eeg(eeg, start_sample, stop_sample):
-    data = np.asarray(eeg["data"])
-    trimmed = data[start_sample:stop_sample, :]
-    out = dict(eeg)
-    out["data"] = trimmed
-    out["pnts"] = stop_sample - start_sample
-    return out
-"""
-
-BENIGN_CHANGED = """import numpy as np
-
-
-def trim_eeg(eeg, start_sample, stop_sample):
-    data = np.asarray(eeg["data"])
-    start = int(start_sample) - 1
-    stop = int(stop_sample)
-    if start < 0 or stop <= start or stop > data.shape[1]:
-        raise ValueError("sample range is outside EEG data")
-    trimmed = data[:, start:stop]
-    out = dict(eeg)
-    out["data"] = trimmed
-    out["pnts"] = trimmed.shape[1]
-    out["xmin"] = start / float(eeg["srate"])
-    out["xmax"] = (stop - 1) / float(eeg["srate"])
-    return out
-"""
-
-BUGGY_PROMPT = (
-    "Acceptance fixture: this EEG change contains a real EEGPrep-style bug. "
-    "Review normally and report only concrete defects introduced by the patch."
-)
-BENIGN_PROMPT = (
-    "Calibration fixture: this EEG change validates 1-based sample bounds and "
-    "preserves channel-major data. Do not flag it unless there is a concrete bug."
-)
-
-
-def parse_args(argv: list[str]) -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        prog="test-review-harness",
-        description="Create a temporary EEG-style repo and run the adapted autoreview helper.",
-    )
-    parser.add_argument("--fixture", choices=("buggy", "benign"), default="buggy")
-    parser.add_argument("--engine", action="append", choices=ENGINES, dest="engines")
-    parser.add_argument(
-        "--dry-run", action="store_true", help="Verify target/bundle setup without spending a model call."
-    )
-    return parser.parse_args(argv)
-
-
-def run(command: list[str], cwd: Path) -> None:
-    subprocess.run(command, cwd=cwd, check=True)
-
-
-def write_fixture_file(repo: Path, content: str) -> None:
-    (repo / "eeg_ops.py").write_text(content, encoding="utf-8", newline="\n")
-
-
-def create_fixture_repo(repo: Path, fixture: str) -> None:
-    run(["git", "init", "--quiet"], repo)
-    run(["git", "config", "user.name", "Review Fixture"], repo)
-    run(["git", "config", "user.email", "review-fixture@example.com"], repo)
-    write_fixture_file(repo, SAFE_INITIAL)
-    run(["git", "add", "eeg_ops.py"], repo)
-    run(["git", "commit", "--quiet", "-m", "initial safe EEG trim"], repo)
-    write_fixture_file(repo, BUGGY_CHANGED if fixture == "buggy" else BENIGN_CHANGED)
-
-
-def run_reviews(repo: Path, script_dir: Path, fixture: str, engines: list[str], *, dry_run: bool) -> None:
-    autoreview = script_dir / "autoreview"
-    for engine in engines:
-        print(f"== {engine} ==", flush=True)
-        command = [
-            sys.executable,
-            str(autoreview),
-            "--mode",
-            "local",
-            "--engine",
-            engine,
-            "--prompt",
-            BUGGY_PROMPT if fixture == "buggy" else BENIGN_PROMPT,
-        ]
-        if fixture == "buggy":
-            command.extend(["--require-finding", "channel", "--expect-findings"])
-        if dry_run:
-            command.append("--dry-run")
-        run(command, repo)
-
-
-def cleanup_repo(repo: Path) -> None:
-    def make_writable_and_retry(function: Callable[[str], object], path: str, _exc_info: object) -> None:
-        try:
-            os.chmod(path, stat.S_IREAD | stat.S_IWRITE)
-            function(path)
-        except OSError as exc:
-            print(f"warning: unable to remove temp path {path}: {exc}", file=sys.stderr)
-
-    if repo.exists():
-        shutil.rmtree(repo, onerror=make_writable_and_retry)
-
-
-def main(argv: list[str]) -> int:
-    args = parse_args(argv)
-    script_dir = Path(__file__).resolve().parent
-    repo = Path(tempfile.mkdtemp(prefix="eegprep-autoreview-fixture."))
-    try:
-        create_fixture_repo(repo, args.fixture)
-        run_reviews(repo, script_dir, args.fixture, args.engines or ["codex"], dry_run=args.dry_run)
-    except subprocess.CalledProcessError as exc:
-        return int(exc.returncode or 1)
-    finally:
-        cleanup_repo(repo)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main(sys.argv[1:]))
diff --git a/.gitignore b/.gitignore
index a9967c33..dbe68211 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
 .claude/settings.local.json
 .context/
-.workflow/
 .coveragerc
 .notes/*
 !.notes/.gitkeep

From 3ab3f8a31b61bc3324ca249dc2a610606fbcee61 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Tue, 9 Jun 2026 18:31:22 -0700
Subject: [PATCH 06/16] Add strict code quality review skill

---
 .../SKILL.md                                  | 100 ++++++++++++++++++
 .../agents/openai.yaml                        |   4 +
 2 files changed, 104 insertions(+)
 create mode 100644 .agents/skills/thermo-nuclear-code-quality-review/SKILL.md
 create mode 100644 .agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml

diff --git a/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md b/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md
new file mode 100644
index 00000000..a5d463fe
--- /dev/null
+++ b/.agents/skills/thermo-nuclear-code-quality-review/SKILL.md
@@ -0,0 +1,100 @@
+---
+name: thermo-nuclear-code-quality-review
+description: Run an unusually strict EEGPrep code-quality review for architecture, maintainability, abstraction quality, file sprawl, spaghetti branching, and missed simplification. Use for thermo-nuclear review, thermonuclear review, deep maintainability audit, strict architecture review, or when code technically works but may make EEGPrep harder to ship.
+---
+
+# Thermo-Nuclear Code Quality Review
+
+Use this for a demanding maintainability review, not a routine bug pass. The goal is to make EEGPrep shippable: standalone, EEGLAB-familiar, easy for EEG researchers, and structurally simple enough for future agents and humans to extend safely.
+
+Inspired by Cursor's MIT-licensed `thermo-nuclear-code-quality-review` skill: https://github.com/cursor/plugins/blob/main/cursor-team-kit/skills/thermo-nuclear-code-quality-review/SKILL.md
+
+## Contract
+
+- Read `AGENTS.md` first and keep its EEGLAB parity, GUI/console, testing, docs, and style rules in force.
+- Review the current diff, PR, branch, or named scope. Do not turn this into a whole-codebase rewrite unless asked.
+- Be ambitious about simplification, but only flag structural issues with a concrete failure mode or clear maintainability cost.
+- Prefer fewer high-conviction findings over a long list of taste comments.
+- If asked to fix findings, verify each one from first principles, make the smallest durable change, run focused tests, and commit only when requested.
+- Do not rubber-stamp code because tests pass. Passing behavior can still be architecturally wrong.
+
+## Review Bar
+
+Ask these questions for every meaningful change:
+
+- Is there a simpler framing that deletes branches, modes, wrappers, flags, or helper layers?
+- Did this add special cases to an already busy flow instead of moving logic to the owning module?
+- Is the logic in the canonical EEGPrep layer?
+- Does the code preserve EEG dict invariants and EEGLAB-facing semantics without hidden global state?
+- Does GUI/menu/console code update `EEGPrepSession`, history, and visible state atomically?
+- Is this abstraction earning its keep, or is it a pass-through wrapper?
+- Did the change create duplicate helpers instead of reusing an existing contract?
+- Did it make data boundaries weaker through unnecessary optionality, casts, duck typing, or silent fallbacks?
+- Did a file cross or approach roughly 1000 lines because new concepts were not decomposed?
+- Does the code remain understandable to an EEG researcher migrating from EEGLAB?
+
+## EEGPrep Architecture Boundaries
+
+Keep ownership clear:
+
+- `popfunc`: user-facing `pop_*` wrappers, history strings, dialogs, `return_com=True`, and EEGLAB-compatible command surfaces.
+- `sigprocfunc`: low-level signal processing and numerical transforms. No GUI/session orchestration here.
+- `guifunc`: Qt/inputgui/menu rendering and GUI coordination. No low-level numerical algorithm ownership here.
+- `adminfunc`: session, options, console, history, storage, and administrative runtime behavior.
+- `plugins/*`: bundled plugin ports and plugin-owned helpers.
+- `resources/help`: EEGPrep-owned Help Markdown for user-facing dialogs/functions.
+- `eeglab/`: development reference only. Runtime code must not depend on it.
+
+Flag layer leaks aggressively when they make future behavior harder to reason about.
+
+## What To Flag
+
+Flag issues such as:
+
+- A "works but messy" implementation where a clear code-judo move would delete complexity.
+- One-off booleans, nullable modes, or scattered feature checks.
+- Repeated conditionals that indicate a missing helper, model, or dispatcher.
+- Partial session/history/dataset updates that can leave GUI and console out of sync.
+- EEGLAB user-facing indices mixed with Python 0-based indices without an explicit boundary.
+- Channel-major EEG data assumptions hidden behind generic array handling.
+- New runtime dependency on vendored EEGLAB, local paths, optional files, or unstated environment state.
+- Thin wrappers, identity helpers, or generic magic that obscure simple EEG invariants.
+- Copy-pasted parsing/history/dialog helpers when a canonical helper exists.
+- Large-file growth that should be split into focused modules.
+- Tests that only assert implementation details while missing user-observable behavior.
+
+## Preferred Remedies
+
+Prefer remedies that:
+
+- Delete concepts rather than rename them.
+- Collapse duplicate branches into one explicit flow.
+- Move logic to the module that owns the concept.
+- Extract small pure helpers for repeated parsing, shape, or indexing contracts.
+- Make state transitions atomic through `EEGPrepSession` helpers.
+- Make data boundaries explicit before converting between EEGLAB-facing and Python-facing indices.
+- Split large modules by stable ownership, not by arbitrary line count.
+- Replace loose optional/cast-heavy code with a concrete contract.
+- Add focused tests for externally observable EEG dict, GUI/session, history, or file behavior.
+
+## Tone And Output
+
+Lead with findings. For each finding include:
+
+- file and line;
+- why it matters for correctness, parity, or maintainability;
+- the concrete scenario or future failure mode;
+- a preferred fix direction.
+
+Order findings by severity:
+
+1. Structural regressions that can create bugs or block maintainability.
+2. Missed simplification that would remove significant complexity.
+3. Wrong ownership/layering or canonical-helper duplication.
+4. State/session/history atomicity risks.
+5. File sprawl and decomposition concerns.
+6. Lower-level legibility issues with real cost.
+
+If there are no actionable issues, say so directly and mention any residual test or review limits.
+
+Do not soften major structural problems into vague nits. Also do not invent architecture work without a real failure mode.
diff --git a/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml b/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml
new file mode 100644
index 00000000..9de350b0
--- /dev/null
+++ b/.agents/skills/thermo-nuclear-code-quality-review/agents/openai.yaml
@@ -0,0 +1,4 @@
+interface:
+  display_name: "Thermo-Nuclear Code Quality Review"
+  short_description: "Strict EEGPrep architecture and maintainability review"
+  default_prompt: "Use $thermo-nuclear-code-quality-review to review the current EEGPrep branch for structural regressions, spaghetti branching, wrong ownership layers, and missed simplifications."

From 4e462152b9c1708221dfa936dd42a954e6de5d4e Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Wed, 10 Jun 2026 17:02:35 -0700
Subject: [PATCH 07/16] Fix session sync and MNE conversion invariants

---
 src/eegprep/functions/adminfunc/console.py    |  44 +++---
 src/eegprep/functions/guifunc/menu_actions.py |  49 ++++---
 src/eegprep/functions/guifunc/session.py      |  82 +++++++++++
 src/eegprep/functions/miscfunc/eeg_eeg2mne.py |  44 ++----
 src/eegprep/functions/miscfunc/eeg_mne2eeg.py |  57 ++------
 .../functions/miscfunc/eeg_mne2eeg_epochs.py  | 136 ++++++++----------
 src/eegprep/functions/sigprocfunc/epoch.py    |  11 +-
 tests/test_console_workspace.py               |  20 +++
 tests/test_eeg_eeg2mne.py                     |  20 ++-
 tests/test_eeg_mne2eeg.py                     |  16 +++
 tests/test_eeg_mne2eeg_epochs.py              |  80 +++++++----
 tests/test_gui_main_window.py                 |  15 +-
 tests/test_pop_epoch.py                       |  23 +++
 13 files changed, 360 insertions(+), 237 deletions(-)

diff --git a/src/eegprep/functions/adminfunc/console.py b/src/eegprep/functions/adminfunc/console.py
index 9f8ecfb4..1793f743 100644
--- a/src/eegprep/functions/adminfunc/console.py
+++ b/src/eegprep/functions/adminfunc/console.py
@@ -352,19 +352,18 @@ def after_execute(self, source: str, *, success: bool = True) -> None:
         history_command = self._history_command_for_source(source, targets)
         changed = False
 
-        if "ALLEEG" in targets:
+        if "ALLEEG" in targets or "CURRENTSET" in targets:
             alleeg = self.namespace.get("ALLEEG", [])
             if not isinstance(alleeg, list):
                 raise ValueError("ALLEEG must be a list of EEG datasets")
-            self.session.ALLEEG = alleeg
-            changed = True
-
-        if "CURRENTSET" in targets:
-            current = _normalize_currentset(self.namespace.get("CURRENTSET"))
-            if current:
-                self.session.retrieve(current if len(current) > 1 else current[0])
-            else:
-                self.session.CURRENTSET = []
+            current = (
+                _normalize_currentset(self.namespace.get("CURRENTSET"))
+                if "CURRENTSET" in targets
+                else self.session.CURRENTSET
+            )
+            self.session.apply_workspace_state(
+                alleeg=alleeg, currentset=current, command="", append_dataset_history=False
+            )
             changed = True
 
         if self._namespace_eeg_changed(targets):
@@ -380,16 +379,16 @@ def after_execute(self, source: str, *, success: bool = True) -> None:
                 changed = True
 
         if "STUDY" in targets:
-            self.session.STUDY = self.namespace.get("STUDY")
-            if "CURRENTSTUDY" not in targets:
-                self.session.CURRENTSTUDY = 1 if self.session.STUDY else 0
+            study_kwargs: dict[str, Any] = {"study": self.namespace.get("STUDY"), "command": ""}
+            if "CURRENTSTUDY" in targets:
+                study_kwargs["currentstudy"] = self.namespace.get("CURRENTSTUDY")
+            self.session.apply_workspace_state(**study_kwargs)
             changed = True
-        if "CURRENTSTUDY" in targets:
-            self.session.CURRENTSTUDY = int(self.namespace.get("CURRENTSTUDY") or 0)
+        elif "CURRENTSTUDY" in targets:
+            self.session.apply_workspace_state(currentstudy=self.namespace.get("CURRENTSTUDY"), command="")
             changed = True
 
         if changed:
-            self.session.notify_changed()
             if history_command and history_command != self.session.LASTCOM:
                 self.session.add_history(history_command)
         self.pull_from_session()
@@ -401,12 +400,13 @@ def accept_pop_result(self, result: Any, args: tuple[Any, ...], kwargs: Mapping[
         dataset_state = _extract_pop_dataset_state(result)
         if dataset_state is not None:
             alleeg, eeg, currentset, command = dataset_state
-            self.session.ALLEEG = alleeg
-            self.session.EEG = eeg
-            self.session.CURRENTSET = _normalize_currentset(currentset)
-            if command:
-                self.session.add_history(command, notify=False)
-            self.session.notify_changed()
+            self.session.apply_workspace_state(
+                alleeg=alleeg,
+                eeg=eeg,
+                currentset=currentset,
+                command=command,
+                append_dataset_history=False,
+            )
             self._pop_updated_session = True
             self.pull_from_session()
             self._refresh()
diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py
index a5254755..b0c80e7a 100644
--- a/src/eegprep/functions/guifunc/menu_actions.py
+++ b/src/eegprep/functions/guifunc/menu_actions.py
@@ -903,14 +903,23 @@ def _run_script(self, parent: Any | None) -> None:
             "EEG": self.session.EEG,
             "ALLEEG": self.session.ALLEEG,
             "CURRENTSET": self.session.current_set_value(),
+            "ALLCOM": list(self.session.ALLCOM),
+            "LASTCOM": self.session.LASTCOM,
             "STUDY": self.session.STUDY,
+            "CURRENTSTUDY": self.session.CURRENTSTUDY,
         }
         command = pop_runscript(filename, namespace)
-        self.session.EEG = namespace.get("EEG", self.session.EEG)
-        self.session.ALLEEG = namespace.get("ALLEEG", self.session.ALLEEG)
-        self.session.CURRENTSET = _currentset_list(namespace.get("CURRENTSET", self.session.current_set_value()))
-        self.session.STUDY = namespace.get("STUDY", self.session.STUDY)
-        self._add_history_from_gui(command)
+        self.session.echo_command(command)
+        self.session.apply_workspace_state(
+            eeg=namespace.get("EEG", self.session.EEG),
+            alleeg=namespace.get("ALLEEG", self.session.ALLEEG),
+            currentset=namespace.get("CURRENTSET", self.session.current_set_value()),
+            allcom=namespace.get("ALLCOM", self.session.ALLCOM),
+            lastcom=namespace.get("LASTCOM", self.session.LASTCOM),
+            study=namespace.get("STUDY", self.session.STUDY),
+            currentstudy=namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY),
+            command=command,
+        )
         self._refresh()
 
     def _bids_tool_action(self, action: str, parent: Any | None) -> None:
@@ -942,8 +951,8 @@ def _bids_tool_action(self, action: str, parent: Any | None) -> None:
 
         updated, command = getattr(bids_tools, action)(target, **metadata)
         if self.session.CURRENTSTUDY == 1 and self.session.STUDY:
-            self.session.STUDY = updated
-            self._add_history_from_gui(command)
+            self.session.echo_command(command)
+            self.session.set_study(updated, command=command)
         else:
             self._store_current_from_gui(updated, command=command)
         self._refresh()
@@ -1045,11 +1054,8 @@ def _apply_extension_result(self, result: Any) -> None:
         dataset_state = _extension_dataset_state(result)
         if dataset_state is not None:
             alleeg, eeg_out, currentset, command = dataset_state
-            self.session.ALLEEG = alleeg
-            self.session.EEG = eeg_out
-            self.session.CURRENTSET = _currentset_list(currentset)
-            self._add_history_from_gui(command)
-            self.session.notify_changed()
+            self.session.echo_command(command)
+            self.session.apply_workspace_state(alleeg=alleeg, eeg=eeg_out, currentset=currentset, command=command)
             self._refresh()
             return
         eeg_out, command = _extension_eeg_and_command(result)
@@ -1360,11 +1366,8 @@ def _copy_current_dataset(self, parent: Any | None) -> None:
         alleeg, eeg_out, current_set, command = pop_copyset(self.session.ALLEEG, set_in, gui=True, return_com=True)
         if not command:
             return
-        self.session.ALLEEG = alleeg
-        self.session.EEG = eeg_out
-        self.session.CURRENTSET = _currentset_list(current_set)
-        self._add_history_from_gui(command)
-        self.session.notify_changed()
+        self.session.echo_command(command)
+        self.session.apply_workspace_state(alleeg=alleeg, eeg=eeg_out, currentset=current_set, command=command)
         self._refresh()
 
     def _merge_datasets(self, parent: Any | None) -> None:
@@ -1540,8 +1543,8 @@ def _run_chanplot(self, parent: Any | None) -> None:
         study, command, _figure = pop_chanplot(self.session.STUDY, self.session.ALLEEG, gui=True, return_com=True)
         if not command:
             return
-        self.session.STUDY = study
-        self._add_history_from_gui(command)
+        self.session.echo_command(command)
+        self.session.set_study(study, command=command)
         self._refresh()
 
     def _store_current_from_gui(self, eeg: Any, **kwargs: Any) -> Any:
@@ -1577,11 +1580,7 @@ def _commit_processed_dataset_from_gui(self, eeg: Any, *, command: str, parent:
         self.session.echo_command(command)
         self.session.add_history(command, notify=False)
         self.session.echo_command(newset_command)
-        self.session.ALLEEG = alleeg
-        self.session.EEG = current
-        self.session.CURRENTSET = _currentset_list(current_set)
-        self.session.add_history(newset_command, notify=False)
-        self.session.notify_changed()
+        self.session.apply_workspace_state(alleeg=alleeg, eeg=current, currentset=current_set, command=newset_command)
 
     def _add_history_from_gui(self, command: str | None) -> None:
         self.session.echo_command(command)
@@ -1592,7 +1591,7 @@ def _retrieve_dataset(self, index: int) -> None:
         self.session.retrieve(index)
         command = f"[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, 'retrieve', {index});"
         if was_study:
-            self.session.CURRENTSTUDY = 0
+            self.session.apply_workspace_state(currentstudy=0)
             command = f"CURRENTSTUDY = 0;{command}"
         self._add_history_from_gui(command)
         self._refresh()
diff --git a/src/eegprep/functions/guifunc/session.py b/src/eegprep/functions/guifunc/session.py
index f2028ceb..6c8df35f 100644
--- a/src/eegprep/functions/guifunc/session.py
+++ b/src/eegprep/functions/guifunc/session.py
@@ -18,6 +18,9 @@
 from eegprep.functions.popfunc.eeg_emptyset import eeg_emptyset
 
 
+_UNSET = object()
+
+
 def has_eeg_data(eeg: Any) -> bool:
     """Return whether an EEG-like object contains non-empty data."""
     if not isinstance(eeg, dict):
@@ -223,6 +226,62 @@ def retrieve(self, indices: int | list[int]) -> dict[str, Any] | list[dict[str,
         self.notify_changed()
         return eeg
 
+    def apply_workspace_state(
+        self,
+        *,
+        eeg: Any = _UNSET,
+        alleeg: Any = _UNSET,
+        currentset: Any = _UNSET,
+        allcom: Any = _UNSET,
+        lastcom: Any = _UNSET,
+        study: Any = _UNSET,
+        currentstudy: Any = _UNSET,
+        command: str = "",
+        append_dataset_history: bool = False,
+    ) -> None:
+        """Apply a GUI/console workspace update as one session transaction."""
+        dataset_changed = eeg is not _UNSET or alleeg is not _UNSET or currentset is not _UNSET
+        if dataset_changed:
+            resolved_alleeg = self.ALLEEG if alleeg is _UNSET else alleeg
+            if not isinstance(resolved_alleeg, list):
+                raise ValueError("ALLEEG must be a list of EEG datasets")
+            resolved_currentset = (
+                list(self.CURRENTSET)
+                if currentset is _UNSET
+                else normalize_dataset_indices(currentset, allow_empty=True)
+            )
+            if resolved_currentset and max(resolved_currentset) > len(resolved_alleeg):
+                raise ValueError("CURRENTSET contains indices outside ALLEEG")
+            resolved_eeg = self._resolve_workspace_eeg(eeg, resolved_alleeg, resolved_currentset)
+            self.ALLEEG = resolved_alleeg
+            self.EEG = resolved_eeg
+            self.CURRENTSET = resolved_currentset
+            self._mirror_current_eeg_into_alleeg()
+            if append_dataset_history:
+                self._append_current_dataset_history(command)
+            offload_storedisk_datasets(self.ALLEEG, set(self.CURRENTSET))
+
+        if allcom is not _UNSET:
+            if not isinstance(allcom, list):
+                raise ValueError("ALLCOM must be a list of command strings")
+            self.ALLCOM = [str(item) for item in allcom if str(item).strip()]
+            self.LASTCOM = self.ALLCOM[-1] if self.ALLCOM else ""
+        if lastcom is not _UNSET:
+            last_command = str(lastcom or "").strip()
+            if last_command and (not self.ALLCOM or self.ALLCOM[-1] != last_command):
+                self.ALLCOM.append(last_command)
+            self.LASTCOM = last_command
+
+        if study is not _UNSET:
+            self.STUDY = study
+            if currentstudy is _UNSET:
+                self.CURRENTSTUDY = 1 if study else 0
+        if currentstudy is not _UNSET:
+            self.CURRENTSTUDY = int(currentstudy or 0)
+
+        self.add_history(command, notify=False)
+        self.notify_changed()
+
     def delete_current(self) -> None:
         """Delete the current dataset selection from memory."""
         if not self.CURRENTSET:
@@ -271,6 +330,29 @@ def set_study(
         self.add_history(command, notify=False)
         self.notify_changed()
 
+    def _resolve_workspace_eeg(
+        self,
+        eeg: Any,
+        alleeg: list[dict[str, Any]],
+        currentset: list[int],
+    ) -> dict[str, Any] | list[dict[str, Any]]:
+        if eeg is not _UNSET:
+            return eeg
+        if not currentset:
+            return eeg_emptyset()
+        selected = [alleeg[index - 1] for index in currentset]
+        return selected if len(selected) > 1 else selected[0]
+
+    def _mirror_current_eeg_into_alleeg(self) -> None:
+        if not self.CURRENTSET:
+            return
+        current = self.EEG if isinstance(self.EEG, list) else [self.EEG]
+        if len(current) != len(self.CURRENTSET):
+            raise ValueError("EEG selection length must match CURRENTSET")
+        for index, eeg in zip(self.CURRENTSET, current):
+            if 1 <= index <= len(self.ALLEEG):
+                self.ALLEEG[index - 1] = eeg
+
     def select_study(self, *, command: str = "CURRENTSTUDY = 1") -> None:
         """Select the current STUDY set in the shared workspace."""
         if not self.STUDY:
diff --git a/src/eegprep/functions/miscfunc/eeg_eeg2mne.py b/src/eegprep/functions/miscfunc/eeg_eeg2mne.py
index 574353c2..0d026766 100644
--- a/src/eegprep/functions/miscfunc/eeg_eeg2mne.py
+++ b/src/eegprep/functions/miscfunc/eeg_eeg2mne.py
@@ -1,13 +1,13 @@
 """EEG to MNE conversion functions."""
 
-from ..popfunc.pop_loadset import pop_loadset
-import mne
+from pathlib import Path
 import tempfile
-import os
+
+import mne
+
 from ..popfunc.pop_saveset import pop_saveset  # in development
 
 
-# write a funtion that converts a MNE raw object to an EEGLAB set file
 def eeg_eeg2mne(EEG):
     """Convert EEG data structure to MNE Raw object.
 
@@ -21,34 +21,10 @@ def eeg_eeg2mne(EEG):
     raw : mne.io.Raw
         MNE Raw object
     """
-    # Generate a temporary file name
-    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-        temp_file_path = temp_file.name
-
-    base, _ = os.path.splitext(temp_file_path)
-    new_temp_file_path = base + ".set"
-
-    # save the raw file as a new EEGLAB .set file using MNE EEGLAB writer
-    pop_saveset(EEG, new_temp_file_path)
-
-    # load the EEGLAB set file
-    if EEG['trials'] > 1:
-        raw = mne.io.read_epochs_eeglab(new_temp_file_path)
-    else:
-        raw = mne.io.read_raw_eeglab(new_temp_file_path, preload=True)
-
-    return raw
-
-
-def test_eeg_eeg2mne():
-    """Test the eeg_eeg2mne function."""
-    eeglab_file_path = './eeglab_data_with_ica_tmp.set'
-    eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set'
-    EEG = pop_loadset(eeglab_file_path)
-    raw = eeg_eeg2mne(EEG)
-
-    # print the keys of the EEG dictionary
-    print(raw.info)
-
+    with tempfile.TemporaryDirectory(prefix="eegprep-eeg2mne-") as temp_dir:
+        set_path = Path(temp_dir) / "bridge.set"
+        pop_saveset(EEG, str(set_path))
 
-# test_eeg_eeg2mne()
+        if EEG['trials'] > 1:
+            return mne.io.read_epochs_eeglab(str(set_path))
+        return mne.io.read_raw_eeglab(str(set_path), preload=True)
diff --git a/src/eegprep/functions/miscfunc/eeg_mne2eeg.py b/src/eegprep/functions/miscfunc/eeg_mne2eeg.py
index 0e14c253..6f0eb1a2 100644
--- a/src/eegprep/functions/miscfunc/eeg_mne2eeg.py
+++ b/src/eegprep/functions/miscfunc/eeg_mne2eeg.py
@@ -1,11 +1,12 @@
 """MNE to EEG conversion functions."""
 
-from ..popfunc.pop_loadset import pop_loadset
-import mne
+from pathlib import Path
 import tempfile
-import os
+
+import mne
 from mne.export import export_raw, export_epochs
-import numpy as np
+
+from ..popfunc.pop_loadset import pop_loadset
 
 
 def _mne_events_to_eeglab_events(raw_or_epochs):
@@ -38,7 +39,6 @@ def _mne_events_to_eeglab_events(raw_or_epochs):
     return events
 
 
-# write a funtion that converts a MNE raw object to an EEGLAB set file
 def eeg_mne2eeg(raw):
     """Convert MNE Raw object to EEG data structure.
 
@@ -54,49 +54,16 @@ def eeg_mne2eeg(raw):
     """
     raw_or_epochs = raw
 
-    # Generate a temporary file name
-    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-        temp_file_path = temp_file.name
-
-    base, _ = os.path.splitext(temp_file_path)
-    new_temp_file_path = base + ".set"
-
-    # save the raw/epochs file as a new EEGLAB .set file using MNE EEGLAB writer
-    if isinstance(raw_or_epochs, mne.BaseEpochs):
-        export_epochs(new_temp_file_path, raw_or_epochs, fmt='eeglab')
-    else:
-        export_raw(new_temp_file_path, raw_or_epochs, fmt='eeglab')
-
-    # load the EEGLAB set file
-    EEG = pop_loadset(new_temp_file_path)
+    with tempfile.TemporaryDirectory(prefix="eegprep-mne2eeg-") as temp_dir:
+        set_path = Path(temp_dir) / "bridge.set"
+        if isinstance(raw_or_epochs, mne.BaseEpochs):
+            export_epochs(str(set_path), raw_or_epochs, fmt='eeglab')
+        else:
+            export_raw(str(set_path), raw_or_epochs, fmt='eeglab')
+        EEG = pop_loadset(str(set_path))
 
     # Inject events/annotations from MNE object into EEGLAB structure
     eeglab_events = _mne_events_to_eeglab_events(raw_or_epochs)
     if eeglab_events:
         EEG['event'] = eeglab_events
-
     return EEG
-
-
-def test_eeg_mne2eeg():
-    """Test the eeg_mne2eeg function."""
-    eeglab_file_path = './eeglab_data_with_ica_tmp.set'
-    eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set'
-    EEG = pop_loadset(eeglab_file_path)
-
-    # create MNE info structure
-    info = mne.create_info(ch_names=[x['labels'] for x in EEG['chanlocs']], sfreq=EEG['srate'], ch_types='eeg')
-    if EEG['trials'] > 1:
-        events = np.array([[i, 0, 1] for i in range(EEG['trials'])])  # NOT CORRECT CONVERTION JUST FOR TESTING
-        event_id = dict(dummy=1)
-        raw = mne.EpochsArray(EEG['data'].transpose(2, 0, 1), info, events, tmin=0, event_id=event_id)
-    else:
-        raw = mne.io.RawArray(EEG['data'], info)
-
-    EEG2 = eeg_mne2eeg(raw)
-
-    # print the keys of the EEG dictionary
-    print(EEG2.keys())
-
-
-# test_eeg_mne2eeg()
diff --git a/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py b/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py
index 876dda17..808f8481 100644
--- a/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py
+++ b/src/eegprep/functions/miscfunc/eeg_mne2eeg_epochs.py
@@ -1,17 +1,15 @@
 """MNE epochs to EEGLAB dataset conversion utilities."""
 
-# Example to export MNE epochs to EEGLAB dataset
-# Events are not handled correctly in this example but it works
-
-import mne
-from mne.preprocessing import ICA
+import logging
 import math
 
 import numpy as np
-from scipy.io import savemat
+
+from eegprep.functions.miscfunc.misc import finite_matmul, finite_pinv
+
+logger = logging.getLogger(__name__)
 
 
-# Load example data
 def eeg_mne2eeg_epochs(epochs, ica):
     """Convert MNE epochs with ICA to EEGLAB dataset format.
 
@@ -27,29 +25,27 @@ def eeg_mne2eeg_epochs(epochs, ica):
     dict
         EEGLAB-compatible dataset dictionary.
     """
-    # export to EEGLAB dataset
-    data = epochs.get_data()  # Get the data from the epochs
-    n_epochs, n_channels, n_times = data.shape
-    ica_weights = ica.get_components()  # ICA weights (n_components x n_channels)
-
-    # create identity matrix of size n_channels x n_channels
-    ica_sphere = np.eye(n_channels)  # ICA sphere (n_channels x n_channels)
-
-    # Compute the mixing matrix (inverse weights)
-    ica_inverse_weights = np.linalg.pinv(ica_weights)  # Shape: (n_channels, n_components)
+    mne_data = epochs.get_data(copy=True)
+    n_epochs, n_channels, n_times = mne_data.shape
+    data = np.transpose(mne_data, (1, 2, 0))
 
     ica_channels = ica.info['ch_names']
     raw_channels = epochs.info['ch_names']  # Assuming you have the raw object
     ica_channel_indices = [raw_channels.index(ch) for ch in ica_channels]
     ica_channel_indices = np.array(ica_channel_indices)
 
-    ica_act = ica.get_sources(epochs).get_data(copy=True).transpose(1, 2, 0)  # Get the ICA activations
+    ica_weights, ica_sphere, ica_inverse_weights, ica_act = _mne_ica_to_eeglab_fields(
+        ica,
+        data[ica_channel_indices],
+        n_times,
+        n_epochs,
+    )
 
-    print('Reference conversion may not be accurate...')
     if 'custom_ref_applied' in epochs.info and epochs.info['custom_ref_applied']:
         ref = 'common'  # Custom reference was applied
     else:
         ref = 'average'  # Default to average reference
+    logger.info("MNE reference metadata converted to EEGPrep ref=%s.", ref)
 
     eeglab_dict = {
         'setname': '',
@@ -70,8 +66,8 @@ def eeg_mne2eeg_epochs(epochs, ica):
         'data': data,
         'icaact': ica_act,
         'icawinv': ica_inverse_weights,
-        'icasphere': ica_weights,
-        'icaweights': ica_sphere,
+        'icasphere': ica_sphere,
+        'icaweights': ica_weights,
         'icachansind': ica_channel_indices,
         'chanlocs': np.array([]),
         'urchanlocs': np.array([]),
@@ -111,21 +107,27 @@ def eeg_mne2eeg_epochs(epochs, ica):
     Y_all = []
     Z_all = []
     for ch in ch_locs:
-        if 'loc' in ch and ch['loc'] is not None:
-            X_all.append(ch['loc'][1] * 1000)
-            Y_all.append(-ch['loc'][0] * 1000)
-            Z_all.append(ch['loc'][2] * 1000)
-            hypotxy = math.hypot(X_all[-1], Y_all[-1])
-            sph_radius_all.append(math.hypot(hypotxy, Z_all[-1]))
-
-            az = math.atan2(Y_all[-1], X_all[-1]) / math.pi * 180
-            horiz = math.atan2(Z_all[-1], hypotxy) / math.pi * 180
-
-            sph_theta_all.append(az)
-            sph_phi_all.append(horiz)
-
-            theta_all.append(-az)  # warning inverse notation compared to MATLAB to match
-            radius_all.append(0.5 - horiz / 180)  # warning inverse notation compared to MATLAB to match
+        loc = ch.get('loc') if isinstance(ch, dict) else None
+        if loc is None or len(loc) < 3:
+            x = y = z = 0.0
+        else:
+            x = float(loc[1]) * 1000
+            y = -float(loc[0]) * 1000
+            z = float(loc[2]) * 1000
+        X_all.append(x)
+        Y_all.append(y)
+        Z_all.append(z)
+        hypotxy = math.hypot(x, y)
+        sph_radius_all.append(math.hypot(hypotxy, z))
+
+        az = math.atan2(y, x) / math.pi * 180
+        horiz = math.atan2(z, hypotxy) / math.pi * 180
+
+        sph_theta_all.append(az)
+        sph_phi_all.append(horiz)
+
+        theta_all.append(-az)  # warning inverse notation compared to MATLAB to match
+        radius_all.append(0.5 - horiz / 180)  # warning inverse notation compared to MATLAB to match
 
     d_list = [
         {
@@ -166,43 +168,31 @@ def eeg_mne2eeg_epochs(epochs, ica):
     d_list = np.array(d_list)
     eeglab_dict['chanlocs'] = d_list
 
-    # # Step 4: Save the EEGLAB dataset as a .mat file
     return eeglab_dict
 
-    # print("EEGLAB dataset saved successfully!")
-
-
-def test_eeg_mne2eeg_epochs():
-    """Test the eeg_mne2eeg_epochs function with sample MNE data."""
-    sample_data_folder = mne.datasets.sample.data_path()
-    sample_data_raw_file = sample_data_folder / "MEG" / "sample" / "sample_audvis_filt-0-40_raw.fif"
-
-    raw = mne.io.read_raw_fif(sample_data_raw_file)
-
-    # extract data epochs
-    events = mne.find_events(raw, stim_channel="STI 014")
-    event_dict = {
-        "auditory/left": 1,
-        "auditory/right": 2,
-        "visual/left": 3,
-        "visual/right": 4,
-        "smiley": 5,
-        "buttonpress": 32,
-    }
-    epochs = mne.Epochs(
-        raw,
-        events,
-        event_id=event_dict,
-        tmin=-0.2,
-        tmax=0.5,
-        preload=True,
-    )
-
-    ica = ICA(n_components=15, random_state=97, max_iter=800)
-    ica.fit(raw)
-
-    EEG = eeg_mne2eeg_epochs(epochs, ica)
-    savemat('output_file.mat', EEG)  # use pop_saveset
-
 
-# test_eeg_mne2eeg_epochs()
+def _mne_ica_to_eeglab_fields(ica, data, n_times, n_epochs):
+    n_components = int(ica.n_components_)
+    n_ica_channels = data.shape[0]
+    prewhitener = _prewhitener_matrix(ica, n_ica_channels)
+    pca_unmixing = finite_matmul(np.asarray(ica.unmixing_matrix_), np.asarray(ica.pca_components_)[:n_components])
+    unmixing = finite_matmul(pca_unmixing, prewhitener)
+    sphere = np.eye(n_ica_channels)
+    inverse_weights = finite_pinv(unmixing)
+    activations_2d = finite_matmul(unmixing, data.reshape(n_ica_channels, -1, order="F"))
+    activations = activations_2d.reshape(n_components, n_times, n_epochs, order="F")
+    return unmixing, sphere, inverse_weights, activations
+
+
+def _prewhitener_matrix(ica, n_channels):
+    prewhitener = np.asarray(ica.pre_whitener_)
+    if ica.noise_cov is not None:
+        if prewhitener.shape != (n_channels, n_channels):
+            raise ValueError("MNE ICA pre-whitener has incompatible shape")
+        return prewhitener
+    values = prewhitener.reshape(-1)
+    if values.size == 1:
+        return np.eye(n_channels) / float(values[0])
+    if values.size != n_channels:
+        raise ValueError("MNE ICA pre-whitener has incompatible shape")
+    return np.diag(1.0 / values)
diff --git a/src/eegprep/functions/sigprocfunc/epoch.py b/src/eegprep/functions/sigprocfunc/epoch.py
index 3afb2eff..c3ac596f 100644
--- a/src/eegprep/functions/sigprocfunc/epoch.py
+++ b/src/eegprep/functions/sigprocfunc/epoch.py
@@ -4,10 +4,14 @@
 locked to specified events.
 """
 
+import logging
+
 import numpy as np
 
 from ..miscfunc.misc import round_mat
 
+logger = logging.getLogger(__name__)
+
 
 def epoch(data, events, lim, **kwargs):
     """
@@ -51,7 +55,8 @@ def _as_1d(a):
     reallim[1] = int(round_mat(lim[1] * g['srate'] - 1))  # minus 1 sample
 
     # --- epoching ---
-    print('Epoching...')
+    if g['verbose'] == 'on':
+        logger.info('Epoching...')
 
     newdatalength = int(reallim[1] - reallim[0] + 1)
 
@@ -116,12 +121,12 @@ def _as_1d(a):
                     indexes[index] = 1
                 else:
                     if g['verbose'] == 'on':
-                        print(f'Warning: event {index + 1} out of value limits')
+                        logger.warning('event %s out of value limits', index + 1)
             else:
                 indexes[index] = 1
         else:
             if g['verbose'] == 'on':
-                print(f'Warning: event {index + 1} out of data boundary')
+                logger.warning('event %s out of data boundary', index + 1)
 
         # Re-reference events
         if g['allevents'] is not None and g['allevents'].size > 0:
diff --git a/tests/test_console_workspace.py b/tests/test_console_workspace.py
index d996217a..53780922 100644
--- a/tests/test_console_workspace.py
+++ b/tests/test_console_workspace.py
@@ -809,6 +809,26 @@ def test_bare_legacy_pop_averef_alias_updates_session_history():
     workspace.close()
 
 
+def test_console_dataset_state_result_updates_session_once_without_duplicate_history():
+    session = EEGPrepSession()
+    session.store_current(_demo_eeg("one"), new=True)
+    session.store_current(_demo_eeg("two"), new=True)
+    refresh = mock.Mock()
+    workspace = EEGPrepConsoleWorkspace(session, refresh=refresh, exports={})
+    first = dict(session.ALLEEG[0], setname="one edited")
+    second = dict(session.ALLEEG[1], setname="two edited")
+    command = "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, retrieve=[2, 1]);"
+
+    result = workspace.accept_pop_result(([first, second], [second, first], [2, 1], command), (), {})
+
+    assert list(result)[3] == command
+    assert session.CURRENTSET == [2, 1]
+    assert [item["setname"] for item in session.EEG] == ["two edited", "one edited"]
+    assert [item["setname"] for item in session.ALLEEG] == ["one edited", "two edited"]
+    assert session.ALLCOM == [command]
+    refresh.assert_called_once()
+
+
 def test_pop_call_without_history_command_records_raw_console_source():
     session = EEGPrepSession()
     session.store_current(_demo_eeg(), new=True)
diff --git a/tests/test_eeg_eeg2mne.py b/tests/test_eeg_eeg2mne.py
index c0a69a68..a182e270 100644
--- a/tests/test_eeg_eeg2mne.py
+++ b/tests/test_eeg_eeg2mne.py
@@ -9,6 +9,7 @@
 import numpy as np
 import tempfile
 import shutil
+from unittest import mock
 
 from eegprep.functions.miscfunc.eeg_eeg2mne import eeg_eeg2mne
 
@@ -26,9 +27,6 @@
 except (ImportError, ValueError):
     from fixtures import create_test_eeg
 
-if os.getenv('EEGPREP_SKIP_MATLAB') == '1':
-    raise unittest.SkipTest("MATLAB not available")
-
 
 class TestEEGEEG2MNE(unittest.TestCase):
     """Test cases for eeg_eeg2mne function."""
@@ -60,6 +58,22 @@ def test_eeg_eeg2mne_continuous_data(self):
         self.assertEqual(result.info['nchan'], continuous_eeg['nbchan'])
         self.assertEqual(result.n_times, continuous_eeg['pnts'])
 
+    @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
+    def test_eeg_eeg2mne_cleans_temporary_bridge_files(self):
+        continuous_eeg = self.test_eeg.copy()
+        continuous_eeg['data'] = np.random.randn(32, 1000)
+        continuous_eeg['trials'] = 1
+        real_tempdir = tempfile.TemporaryDirectory
+
+        def tempdir_factory(*args, **kwargs):
+            kwargs["dir"] = self.temp_dir
+            return real_tempdir(*args, **kwargs)
+
+        with mock.patch("eegprep.functions.miscfunc.eeg_eeg2mne.tempfile.TemporaryDirectory", tempdir_factory):
+            eeg_eeg2mne(continuous_eeg)
+
+        self.assertEqual(os.listdir(self.temp_dir), [])
+
     @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
     def test_eeg_eeg2mne_epoched_data(self):
         """Test conversion of epoched EEG data."""
diff --git a/tests/test_eeg_mne2eeg.py b/tests/test_eeg_mne2eeg.py
index 8b9000a7..16f0bbfb 100644
--- a/tests/test_eeg_mne2eeg.py
+++ b/tests/test_eeg_mne2eeg.py
@@ -10,6 +10,7 @@
 import tempfile
 import os
 import shutil
+from unittest import mock
 
 # Add src to path for imports
 sys.path.insert(0, 'src')
@@ -92,6 +93,21 @@ def test_eeg_mne2eeg_raw_object(self):
         except Exception as e:
             self.skipTest(f"eeg_mne2eeg raw conversion not available: {e}")
 
+    @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
+    def test_eeg_mne2eeg_cleans_temporary_bridge_files(self):
+        info = mne.create_info(["EEG001", "EEG002"], 100.0, ch_types='eeg')
+        raw = mne.io.RawArray(np.random.randn(2, 100), info)
+        real_tempdir = tempfile.TemporaryDirectory
+
+        def tempdir_factory(*args, **kwargs):
+            kwargs["dir"] = self.temp_dir
+            return real_tempdir(*args, **kwargs)
+
+        with mock.patch("eegprep.functions.miscfunc.eeg_mne2eeg.tempfile.TemporaryDirectory", tempdir_factory):
+            eeg_mne2eeg(raw)
+
+        self.assertEqual(os.listdir(self.temp_dir), [])
+
     @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
     def test_eeg_mne2eeg_epochs_object(self):
         """Test conversion of MNE Epochs object."""
diff --git a/tests/test_eeg_mne2eeg_epochs.py b/tests/test_eeg_mne2eeg_epochs.py
index 1f120272..3964e3cf 100644
--- a/tests/test_eeg_mne2eeg_epochs.py
+++ b/tests/test_eeg_mne2eeg_epochs.py
@@ -4,6 +4,8 @@
 This module tests the eeg_mne2eeg_epochs function that converts MNE Epochs with ICA to EEGLAB datasets.
 """
 
+import contextlib
+import io
 import unittest
 import os
 import numpy as np
@@ -11,6 +13,7 @@
 import shutil
 
 from eegprep.functions.miscfunc.eeg_mne2eeg_epochs import eeg_mne2eeg_epochs
+from eegprep.functions.miscfunc.misc import finite_matmul, finite_pinv
 
 try:
     import mne
@@ -25,9 +28,6 @@
 except (ImportError, ValueError):
     from fixtures import create_test_eeg
 
-if os.getenv('EEGPREP_SKIP_MATLAB') == '1':
-    raise unittest.SkipTest("MATLAB not available")
-
 
 class TestEEGMNE2EEGEpochs(unittest.TestCase):
     """Test cases for eeg_mne2eeg_epochs function."""
@@ -85,6 +85,28 @@ def test_eeg_mne2eeg_epochs_basic_functionality(self):
         except Exception as e:
             self.skipTest(f"eeg_mne2eeg_epochs basic functionality not available: {e}")
 
+    @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
+    def test_eeg_mne2eeg_epochs_uses_channel_major_data_without_stdout(self):
+        n_channels = 4
+        n_times = 20
+        n_epochs = 3
+        sfreq = 100.0
+        ch_names = [f'EEG{i:03d}' for i in range(n_channels)]
+        info = mne.create_info(ch_names, sfreq, ch_types='eeg')
+        data = np.arange(n_epochs * n_channels * n_times, dtype=float).reshape(n_epochs, n_channels, n_times)
+        events = np.array([[i, 0, 1] for i in range(n_epochs)])
+        epochs = mne.EpochsArray(data, info, events, tmin=0, event_id={'event': 1}, verbose=False)
+        ica = ICA(n_components=2, random_state=42, max_iter=20)
+        ica.fit(epochs, verbose=False)
+
+        stream = io.StringIO()
+        with contextlib.redirect_stdout(stream):
+            result = eeg_mne2eeg_epochs(epochs, ica)
+
+        self.assertEqual(stream.getvalue(), "")
+        self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs))
+        np.testing.assert_allclose(result['data'], np.transpose(data, (1, 2, 0)))
+
     @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
     def test_eeg_mne2eeg_epochs_ica_fields(self):
         """Test ICA fields in the converted EEGLAB dataset."""
@@ -106,25 +128,23 @@ def test_eeg_mne2eeg_epochs_ica_fields(self):
         ica = ICA(n_components=8, random_state=42)
         ica.fit(epochs)
 
-        try:
-            result = eeg_mne2eeg_epochs(epochs, ica)
-
-            # Check ICA fields
-            self.assertIn('icaact', result)
-            self.assertIn('icawinv', result)
-            self.assertIn('icasphere', result)
-            self.assertIn('icaweights', result)
-            self.assertIn('icachansind', result)
-
-            # Check ICA field shapes
-            self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs))  # n_components x n_times x n_epochs
-            self.assertEqual(result['icawinv'].shape, (8, n_channels))  # n_components x n_channels
-            self.assertEqual(result['icasphere'].shape, (n_channels, 8))  # n_channels x n_components
-            self.assertEqual(result['icaweights'].shape, (n_channels, n_channels))  # identity matrix
-            self.assertEqual(len(result['icachansind']), n_channels)  # channel indices
-
-        except Exception as e:
-            self.skipTest(f"eeg_mne2eeg_epochs ICA fields not available: {e}")
+        result = eeg_mne2eeg_epochs(epochs, ica)
+
+        self.assertIn('icaact', result)
+        self.assertIn('icawinv', result)
+        self.assertIn('icasphere', result)
+        self.assertIn('icaweights', result)
+        self.assertIn('icachansind', result)
+        self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs))
+        self.assertEqual(result['icawinv'].shape, (n_channels, 8))
+        self.assertEqual(result['icasphere'].shape, (n_channels, n_channels))
+        self.assertEqual(result['icaweights'].shape, (8, n_channels))
+        self.assertEqual(len(result['icachansind']), n_channels)
+        unmixing = finite_matmul(result['icaweights'], result['icasphere'])
+        data_2d = result['data'][result['icachansind']].reshape(n_channels, -1, order="F")
+        icaact_2d = result['icaact'].reshape(8, -1, order="F")
+        np.testing.assert_allclose(finite_matmul(unmixing, data_2d), icaact_2d, rtol=1e-10, atol=1e-10)
+        np.testing.assert_allclose(finite_pinv(unmixing), result['icawinv'], rtol=1e-10, atol=1e-10)
 
     @unittest.skipUnless(MNE_AVAILABLE, "MNE not available")
     def test_eeg_mne2eeg_epochs_channel_locations(self):
@@ -252,9 +272,8 @@ def test_eeg_mne2eeg_epochs_single_epoch(self):
         try:
             result = eeg_mne2eeg_epochs(epochs, ica)
 
-            # Check data dimensions (data is in MNE format: n_epochs x n_channels x n_times)
             self.assertEqual(result['trials'], 1)
-            self.assertEqual(result['data'].shape, (n_epochs, n_channels, n_times))
+            self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs))
             self.assertEqual(result['icaact'].shape, (8, n_times, n_epochs))
 
         except Exception as e:
@@ -286,8 +305,8 @@ def test_eeg_mne2eeg_epochs_minimal_channels(self):
             result = eeg_mne2eeg_epochs(epochs, ica)
 
             # Check data dimensions
-            self.assertEqual(result['nbchan'], 1)
-            self.assertEqual(result['data'].shape, (1, n_times, n_epochs))
+            self.assertEqual(result['nbchan'], n_channels)
+            self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs))
             self.assertEqual(result['icaact'].shape, (2, n_times, n_epochs))
 
         except Exception as e:
@@ -317,10 +336,9 @@ def test_eeg_mne2eeg_epochs_short_data(self):
         try:
             result = eeg_mne2eeg_epochs(epochs, ica)
 
-            # Check data dimensions (data is in MNE format: n_epochs x n_channels x n_times)
             self.assertEqual(result['pnts'], 10)
             self.assertEqual(result['trials'], 3)
-            self.assertEqual(result['data'].shape, (n_epochs, n_channels, n_times))
+            self.assertEqual(result['data'].shape, (n_channels, n_times, n_epochs))
 
         except Exception as e:
             self.skipTest(f"eeg_mne2eeg_epochs short data not available: {e}")
@@ -484,9 +502,9 @@ def test_eeg_mne2eeg_epochs_integration_workflow(self):
 
             # Check ICA properties
             self.assertEqual(result['icaact'].shape, (15, 200, 20))
-            self.assertEqual(result['icawinv'].shape, (15, 32))
-            self.assertEqual(result['icasphere'].shape, (32, 15))
-            self.assertEqual(result['icaweights'].shape, (32, 32))
+            self.assertEqual(result['icawinv'].shape, (32, 15))
+            self.assertEqual(result['icasphere'].shape, (32, 32))
+            self.assertEqual(result['icaweights'].shape, (15, 32))
             self.assertEqual(len(result['icachansind']), 32)
 
             # Check channel locations
diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py
index a9d58b37..26b43888 100644
--- a/tests/test_gui_main_window.py
+++ b/tests/test_gui_main_window.py
@@ -1329,7 +1329,14 @@ def test_file_menu_runscript_updates_currentset_from_namespace(self):
         qt_widgets = _fake_qt_widgets(open_file="/tmp/script.py")
 
         def fake_runscript(_filename, namespace):
+            self.assertIn("ALLCOM", namespace)
+            self.assertIn("LASTCOM", namespace)
+            self.assertIn("CURRENTSTUDY", namespace)
             namespace["CURRENTSET"] = 2
+            namespace["ALLCOM"].append("EEG = script_command(EEG);")
+            namespace["LASTCOM"] = "EEG = script_command(EEG);"
+            namespace["STUDY"] = {"name": "script study"}
+            namespace["CURRENTSTUDY"] = 1
             return "LASTCOM = pop_runscript('/tmp/script.py');"
 
         with (
@@ -1339,7 +1346,13 @@ def fake_runscript(_filename, namespace):
             dispatcher.dispatch("pop_runscript")
 
         self.assertEqual(session.CURRENTSET, [2])
-        self.assertEqual(session.ALLCOM[-1], "LASTCOM = pop_runscript('/tmp/script.py');")
+        self.assertEqual(session.STUDY["name"], "script study")
+        self.assertEqual(session.CURRENTSTUDY, 1)
+        self.assertEqual(
+            session.ALLCOM,
+            ["EEG = script_command(EEG);", "LASTCOM = pop_runscript('/tmp/script.py');"],
+        )
+        self.assertEqual(session.LASTCOM, "LASTCOM = pop_runscript('/tmp/script.py');")
 
 
 class QtMainWindowTests(unittest.TestCase):
diff --git a/tests/test_pop_epoch.py b/tests/test_pop_epoch.py
index 886bedfe..7e241076 100644
--- a/tests/test_pop_epoch.py
+++ b/tests/test_pop_epoch.py
@@ -6,6 +6,8 @@
 MATLAB EEGLAB's pop_epoch function across all tested scenarios.
 """
 
+import contextlib
+import io
 import os
 import numpy as np
 import unittest
@@ -390,6 +392,27 @@ class TestPopEpochEdgeCases(unittest.TestCase):
     def setUp(self):
         np.random.seed(42)
 
+    def test_pop_epoch_does_not_print_to_stdout(self):
+        EEG = {
+            'data': np.random.randn(2, 500).astype(np.float32),
+            'srate': 100.0,
+            'nbchan': 2,
+            'pnts': 500,
+            'trials': 1,
+            'xmin': 0.0,
+            'xmax': 4.99,
+            'setname': 'stdout_test',
+            'event': [{'type': 'stim', 'latency': 250}],
+            'epoch': [],
+            'saved': 'no',
+        }
+        stream = io.StringIO()
+
+        with contextlib.redirect_stdout(stream):
+            pop_epoch(EEG, 'stim', [-0.1, 0.1])
+
+        self.assertEqual(stream.getvalue(), "")
+
     def test_boundary_events_near_edges(self):
         """Test epoching when events are near data boundaries"""
         # Create EEG with events near boundaries

From 2be2a7a88a5684bff05e1dfd92e53ff9e1dd1014 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Wed, 10 Jun 2026 17:24:27 -0700
Subject: [PATCH 08/16] Fold in useful Claude review fixes

---
 src/eegprep/functions/guifunc/menu_actions.py | 23 +++++++++++--------
 tests/conftest.py                             |  2 --
 tests/test_gui_main_window.py                 | 21 +++++++++++++++++
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py
index b0c80e7a..dfcf9098 100644
--- a/src/eegprep/functions/guifunc/menu_actions.py
+++ b/src/eegprep/functions/guifunc/menu_actions.py
@@ -910,16 +910,19 @@ def _run_script(self, parent: Any | None) -> None:
         }
         command = pop_runscript(filename, namespace)
         self.session.echo_command(command)
-        self.session.apply_workspace_state(
-            eeg=namespace.get("EEG", self.session.EEG),
-            alleeg=namespace.get("ALLEEG", self.session.ALLEEG),
-            currentset=namespace.get("CURRENTSET", self.session.current_set_value()),
-            allcom=namespace.get("ALLCOM", self.session.ALLCOM),
-            lastcom=namespace.get("LASTCOM", self.session.LASTCOM),
-            study=namespace.get("STUDY", self.session.STUDY),
-            currentstudy=namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY),
-            command=command,
-        )
+        state = {
+            "alleeg": namespace.get("ALLEEG", self.session.ALLEEG),
+            "currentset": namespace.get("CURRENTSET", self.session.current_set_value()),
+            "allcom": namespace.get("ALLCOM", self.session.ALLCOM),
+            "lastcom": namespace.get("LASTCOM", self.session.LASTCOM),
+            "study": namespace.get("STUDY", self.session.STUDY),
+            "currentstudy": namespace.get("CURRENTSTUDY", self.session.CURRENTSTUDY),
+            "command": command,
+        }
+        script_eeg = namespace.get("EEG", self.session.EEG)
+        if script_eeg is not self.session.EEG:
+            state["eeg"] = script_eeg
+        self.session.apply_workspace_state(**state)
         self._refresh()
 
     def _bids_tool_action(self, action: str, parent: Any | None) -> None:
diff --git a/tests/conftest.py b/tests/conftest.py
index 1735e1a8..b132c768 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,10 +60,8 @@ def _preload_matlab_libstdcxx() -> None:
     "tests/test_bids_preproc.py",
     "tests/test_clean_rawdata.py",
     "tests/test_eeg_compare.py",
-    "tests/test_eeg_eeg2mne.py",
     "tests/test_eeg_eegrej.py",
     "tests/test_eeg_lat2point.py",
-    "tests/test_eeg_mne2eeg_epochs.py",
     "tests/test_eeg_point2lat.py",
     "tests/test_eeg_rpsd_parity.py",
     "tests/test_eegfindboundaries.py",
diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py
index 26b43888..957d2cbb 100644
--- a/tests/test_gui_main_window.py
+++ b/tests/test_gui_main_window.py
@@ -1354,6 +1354,27 @@ def fake_runscript(_filename, namespace):
         )
         self.assertEqual(session.LASTCOM, "LASTCOM = pop_runscript('/tmp/script.py');")
 
+    def test_file_menu_runscript_clear_currentset_resets_eeg(self):
+        session = EEGPrepSession()
+        session.store_current(_demo_eeg(), new=True)
+        dispatcher = MenuActionDispatcher(session)
+        qt_widgets = _fake_qt_widgets(open_file="/tmp/script.py")
+
+        def fake_runscript(_filename, namespace):
+            namespace["CURRENTSET"] = 0
+            return "LASTCOM = pop_runscript('/tmp/script.py');"
+
+        with (
+            mock.patch("eegprep.functions.guifunc.menu_actions._require_qt_widgets", return_value=qt_widgets),
+            mock.patch("eegprep.functions.popfunc.pop_runscript.pop_runscript", side_effect=fake_runscript),
+        ):
+            dispatcher.dispatch("pop_runscript")
+
+        self.assertEqual(session.CURRENTSET, [])
+        self.assertIsInstance(session.EEG, dict)
+        self.assertEqual(session.EEG.get("setname"), "")
+        self.assertEqual(session.EEG["data"].size, 0)
+
 
 class QtMainWindowTests(unittest.TestCase):
     def test_gui_main_window_startup_branding_size_and_menu_states(self):

From 601b5adc3be2b5351348d9900c949dbe624c5d16 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Wed, 10 Jun 2026 18:22:03 -0700
Subject: [PATCH 09/16] Address code quality review findings

---
 .../functions/adminfunc/eeg_checkset.py       |  13 -
 .../functions/adminfunc/eeglabcompat.py       | 103 ++---
 src/eegprep/functions/adminfunc/pymat.py      | 130 ------
 src/eegprep/functions/eegobj/eegobj.py        |   5 -
 src/eegprep/functions/guifunc/menu_actions.py |  36 +-
 .../functions/guifunc/visual_capture.py       | 384 ++++++++----------
 .../functions/miscfunc/save_struct_as_hdf5.py |  18 -
 src/eegprep/functions/popfunc/eeg_compare.py  |  13 -
 src/eegprep/functions/popfunc/eeg_eegrej.py   |   6 +-
 src/eegprep/functions/popfunc/eeg_interp.py   | 159 --------
 .../functions/popfunc/eeg_lat2point.py        |   7 +-
 .../functions/popfunc/pop_load_frombids.py    |   2 +-
 src/eegprep/functions/popfunc/pop_loadset.py  |  13 -
 .../functions/popfunc/pop_loadset_h5.py       |  13 -
 src/eegprep/functions/popfunc/pop_saveset.py  |  17 -
 src/eegprep/functions/popfunc/pop_select.py   |  28 +-
 src/eegprep/plugins/ICLabel/eeg_autocorr.py   |  24 --
 .../plugins/ICLabel/eeg_autocorr_fftw.py      |  27 --
 .../plugins/ICLabel/eeg_autocorr_welch.py     |  16 -
 src/eegprep/plugins/ICLabel/eeg_rpsd.py       |  18 -
 .../ICLabel/iclabel_net_load_py_measures.py   |  23 +-
 tests/test_eeg_eegrej.py                      |   9 +-
 tests/test_eeglabcompat.py                    |  41 ++
 tests/test_gui_main_window.py                 |  20 +
 tests/test_processing_logging_contract.py     |  70 ++++
 tests/test_visual_parity.py                   |  17 +-
 26 files changed, 400 insertions(+), 812 deletions(-)
 create mode 100644 tests/test_processing_logging_contract.py

diff --git a/src/eegprep/functions/adminfunc/eeg_checkset.py b/src/eegprep/functions/adminfunc/eeg_checkset.py
index b94b5c58..3e04c78f 100644
--- a/src/eegprep/functions/adminfunc/eeg_checkset.py
+++ b/src/eegprep/functions/adminfunc/eeg_checkset.py
@@ -545,16 +545,3 @@ def eeg_checkset(EEG, *checks, load_data=True):
                 )
 
     return EEG
-
-
-def test_eeg_checkset():
-    from eegprep.functions.popfunc.pop_loadset import pop_loadset
-
-    eeglab_file_path = './sample_data/eeglab_data_with_ica_tmp_out2.set'
-    EEG = pop_loadset(eeglab_file_path)
-    EEG = eeg_checkset(EEG)
-    logger.info('Checkset done')
-
-
-if __name__ == '__main__':
-    test_eeg_checkset()
diff --git a/src/eegprep/functions/adminfunc/eeglabcompat.py b/src/eegprep/functions/adminfunc/eeglabcompat.py
index f7b9c532..063ae948 100644
--- a/src/eegprep/functions/adminfunc/eeglabcompat.py
+++ b/src/eegprep/functions/adminfunc/eeglabcompat.py
@@ -169,9 +169,9 @@ def wrapper(*args, **kwargs):
                     temp_filename2 = temp_file2.name
                 result_filename = temp_filename1 + '.result.set'
                 result_extra_filename = temp_filename1 + '.result.mat'
-                print(f"temp_filename1: {temp_filename1}")
-                print(f"temp_filename2: {temp_filename2}")
-                print(f"result_filename: {result_filename}")
+                logger.debug("MATLAB roundtrip input set path: %s", temp_filename1)
+                logger.debug("MATLAB roundtrip args path: %s", temp_filename2)
+                logger.debug("MATLAB roundtrip result set path: %s", result_filename)
 
                 # save all parameters in the temp_filename which is a .mat file
                 if len(new_args) > 0:
@@ -192,7 +192,7 @@ def wrapper(*args, **kwargs):
                     pop_saveset(args[0], temp_filename1)
                     self.engine.eval(f"EEG = pop_loadset('{temp_filename1}');", nargout=0)
 
-                print(f"Running in MATLAB/Octave: {eval_str}")
+                logger.debug("Running in MATLAB/Octave: %s", eval_str)
                 self.engine.eval(eval_str, nargout=0)
 
                 # output
@@ -280,12 +280,12 @@ def get_eeglab(runtime: str = default_runtime, *, auto_file_roundtrip: bool = Tr
     try:
         engine = _cache[rt]
     except KeyError:
-        print(f"Loading {runtime} runtime...", end='', flush=True)
+        logger.info("Loading %s runtime...", runtime)
         # On the command line, type "octave-8.4.0" OCTAVE_EXECUTABLE or OCTAVE var
         path2eeglab = str(_resolve_eeglab_root())
         matlab_test_dir = REPO_ROOT / 'tests' / 'matlab'
         scripts_dir = str(REPO_ROOT / 'scripts')
-        print("This is the path2eeglab: ", path2eeglab)
+        logger.debug("EEGLAB reference path: %s", path2eeglab)
 
         # not yet loaded, do so now
         if rt == 'oct':
@@ -344,7 +344,7 @@ def get_eeglab(runtime: str = default_runtime, *, auto_file_roundtrip: bool = Tr
             engine.logger.setLevel(logging.INFO)
 
         _cache[rt] = engine
-        print('done.')
+        logger.info("Loaded %s runtime.", runtime)
 
     # optionally wrap the engine in a file-roundtripping wrapper
     if auto_file_roundtrip:
@@ -496,66 +496,29 @@ def clean_artifacts(
     else:
         BurstRejection = 'on'
 
-    pop_saveset(EEG, './tmp.set')  # 0.8 seconds
-    EEG2 = eeglab.pop_loadset('./tmp.set')  # 2 seconds
-    EEG3 = eeglab.clean_artifacts(
-        EEG2,
-        'ChannelCriterion',
-        ChannelCriterion,
-        'LineNoiseCriterion',
-        LineNoiseCriterion,
-        'FlatlineCriterion',
-        FlatlineCriterion,
-        'BurstCriterion',
-        BurstCriterion,
-        'BurstRejection',
-        BurstRejection,
-        'WindowCriterion',
-        WindowCriterion,
-        'Highpass',
-        Highpass,
-        'WindowCriterionTolerances',
-        WindowCriterionTolerances,
-    )
-    eeglab.pop_saveset(EEG3, './tmp2.set')  # 2.4 seconds
-    EEG4 = pop_loadset('./tmp2.set')  # 0.2 seconds
-
-    # delete temporary files
-    os.remove('./tmp.set')
-    os.remove('./tmp2.set')
-    return EEG4
-
-
-# sys.exit()
-def test_eeglab_compat():
-    """Test EEGLAB compatibility."""
-    eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set'
-
-    EEG = pop_loadset(eeglab_file_path)
-    EEG = pop_eegfiltnew(EEG, locutoff=5, hicutoff=25, revfilt=True, plotfreqz=False)
-    EEG = clean_artifacts(
-        EEG,
-        FlatlineCriterion=5,
-        ChannelCriterion=0.87,
-        LineNoiseCriterion=4,
-        Highpass=False,
-        BurstCriterion=20,
-        WindowCriterion=0.25,
-        BurstRejection=False,
-        WindowCriterionTolerances=[float('-inf'), 7],
-    )
-
-    # EEG = eeglab.pop_loadset(eeglab_file_path)
-    # TMPEEG = eeglab.pop_eegfiltnew(EEG, 'locutoff',5,'hicutoff',25,'revfilt',1,'plotfreqz',0)
-    # CLEANEDEEG = eeglab.clean_artifacts(TMPEEG, 'ChannelCriterion', 'off',
-    #     'LineNoiseCriterion', 'off',
-    #     'FlatlineCriterion', 'off',
-    #     'BurstCriterion', 'off',
-    #     'WindowCriterion', 0,
-    #     'Highpass',[0.25, 0.75],
-    #     'WindowCriterionTolerances', [-10000000, 8])
-
-    # clean_artifacts( EEG, ChannelCriterion='on' )
-
-
-# test_eeglab_compat()
+    with tempfile.TemporaryDirectory(prefix="eegprep_clean_artifacts_") as workdir:
+        input_path = Path(workdir) / "input.set"
+        output_path = Path(workdir) / "output.set"
+        pop_saveset(EEG, input_path)
+        EEG2 = eeglab.pop_loadset(str(input_path))
+        EEG3 = eeglab.clean_artifacts(
+            EEG2,
+            'ChannelCriterion',
+            ChannelCriterion,
+            'LineNoiseCriterion',
+            LineNoiseCriterion,
+            'FlatlineCriterion',
+            FlatlineCriterion,
+            'BurstCriterion',
+            BurstCriterion,
+            'BurstRejection',
+            BurstRejection,
+            'WindowCriterion',
+            WindowCriterion,
+            'Highpass',
+            Highpass,
+            'WindowCriterionTolerances',
+            WindowCriterionTolerances,
+        )
+        eeglab.pop_saveset(EEG3, str(output_path))
+        return pop_loadset(output_path)
diff --git a/src/eegprep/functions/adminfunc/pymat.py b/src/eegprep/functions/adminfunc/pymat.py
index d1bf6152..48f334ae 100644
--- a/src/eegprep/functions/adminfunc/pymat.py
+++ b/src/eegprep/functions/adminfunc/pymat.py
@@ -266,133 +266,3 @@ def mat2py(obj):
     else:
         # Fallback: return the object as-is if no conversion rule applies
         return obj
-
-
-def test_py2mat():
-    """Test the py2mat and mat2py conversion functions with various data structures."""
-    import scipy.io
-
-    # Test basic functionality
-    print("=== Basic Test ===")
-    dicts = [{'a': 'adsaf1', 'b': 2.0}, {'a': 'adsaf', 'b': 4.0}, {'a': 'adsaf33', 'b': 7.0}]
-    struct_array = py2mat(dicts)
-    print("Original: ", dicts)
-
-    mat2py(struct_array)
-    scipy.io.savemat('test1.mat', {'struct_array': struct_array})
-    struct_array2 = scipy.io.loadmat('test1.mat')
-    struct_array2 = struct_array2['struct_array'][0]
-    dicts3 = mat2py(struct_array2)
-    print("Converted: ", dicts3)
-
-    # Test nested dictionaries
-    print("\n=== Nested Dictionary Test ===")
-    nested_dicts = [
-        {'name': 'item1', 'value': 10.5, 'config': {'enabled': True, 'threshold': 0.8}, 'tags': ['tag1', 'tag2']},
-        {'name': 'item2', 'value': 20.3, 'config': {'enabled': False, 'threshold': 0.9}, 'tags': ['tag3']},
-    ]
-    nested_struct = py2mat(nested_dicts)
-    print("Original: ", nested_dicts)
-
-    nested_dict2 = mat2py(nested_struct)
-    print("Converted back (not fully compatible): ", nested_dict2)
-
-    scipy.io.savemat('test2.mat', {'nested_struct': nested_struct})
-    nested_struct2 = scipy.io.loadmat('test2.mat')
-    nested_struct2 = nested_struct2['nested_struct'][0]
-    nested_dict3 = mat2py(nested_struct2)
-    print("Converted: ", nested_dict3)
-
-    # Test list of dictionaries as values
-    print("\n=== List of Dictionaries Test ===")
-    list_dict_data = [
-        {'id': 1, 'measurements': [{'sensor': 'A', 'reading': 1.2}, {'sensor': 'B', 'reading': 2.3}]},
-        {
-            'id': 2,
-            'measurements': [
-                {'sensor': 'A', 'reading': 3.4},
-                {'sensor': 'B', 'reading': 4.5},
-                {'sensor': 'C', 'reading': 5.6},
-            ],
-        },
-    ]
-    list_dict_struct = py2mat(list_dict_data)
-    scipy.io.savemat('test3.mat', {'list_dict_struct': list_dict_struct})
-    list_dict_struct2 = scipy.io.loadmat('test3.mat')
-    list_dict_struct2 = list_dict_struct2['list_dict_struct'][0]
-    list_dict_data3 = mat2py(list_dict_struct2)
-    print("Original: ", list_dict_data)
-    print("Converted: ", list_dict_data3)
-
-    # Test single dictionary input
-    print("\n=== Single Dictionary Test ===")
-    single_dict = {'x': 1, 'y': 2, 'nested': {'a': 'hello', 'b': 'world'}}
-    single_struct = py2mat(single_dict)
-    scipy.io.savemat('test4.mat', {'single_struct': single_struct})
-    single_struct2 = scipy.io.loadmat('test4.mat')
-    single_struct2 = single_struct2['single_struct'][0]
-    single_dict2 = mat2py(single_struct2)
-    print("Original: ", single_dict)
-    print("Converted: ", single_dict2)
-
-    # Test numpy array of dictionaries
-    print("\n=== NumPy Array of Dictionaries Test ===")
-    dict_array = np.array(
-        [{'name': 'sensor1', 'value': 1.1}, {'name': 'sensor2', 'value': 2.2}, {'name': 'sensor3', 'value': 3.3}],
-        dtype=object,
-    )
-
-    array_dict_data = [
-        {'id': 'device1', 'sensors': dict_array},
-        {
-            'id': 'device2',
-            'sensors': np.array([{'name': 'sensorA', 'value': 4.4}, {'name': 'sensorB', 'value': 5.5}], dtype=object),
-        },
-    ]
-
-    array_dict_struct = py2mat(array_dict_data)
-    scipy.io.savemat('test5.mat', {'array_dict_struct': array_dict_struct})
-    array_dict_struct2 = scipy.io.loadmat('test5.mat')
-    array_dict_struct2 = array_dict_struct2['array_dict_struct'][0]
-    array_dict_data2 = mat2py(array_dict_struct2)
-    print("Original: ", array_dict_data)
-    print("Converted: ", array_dict_data2)  # Numpy array gets converted to a list of dicts
-
-    params = [np.vstack([np.arange(1, 21), np.arange(101, 121)]), [[5, 8]], 10.0, [{'latency': 5.0}, {'latency': 10.0}]]
-    params_struct = py2mat(params)
-    scipy.io.savemat('test6.mat', {'params_struct': params_struct})
-    params_struct2 = scipy.io.loadmat('test6.mat')
-    params_struct2 = params_struct2['params_struct'][0]
-    params_data2 = mat2py(params_struct2)
-    print("Original: ", params)
-    print("Converted: ", params_data2)
-
-    # EEGLAB dataset
-    eeglab_file_path = '/System/Volumes/Data/data/matlab/eeglab/sample_data/eeglab_data_epochs_ica.set'
-    from eegprep.functions.popfunc.pop_loadset import pop_loadset
-
-    pop_loadset(eeglab_file_path)
-
-    # pop_loadset wihtout index adjustment
-    EEG_LOADMAT = scipy.io.loadmat(eeglab_file_path)
-    EEG_LOADMAT = mat2py(EEG_LOADMAT['EEG'][0])
-
-    # pop_saveset without index adjustment
-    EEG_TMP = EEG_LOADMAT.copy()
-    EEG_TMP = py2mat(EEG_TMP)
-    scipy.io.savemat('test7.set', {'EEG': EEG_TMP})
-
-    # load again
-    EEG_LOADMAT2 = scipy.io.loadmat('test7.set')
-    EEG_LOADMAT2 = mat2py(EEG_LOADMAT2['EEG'][0])
-
-    # Limitations
-    print("\n=== Limitations ===")
-    print(
-        "- Conversion back: py2mat then mat2py does not always work for nested structures (works when the file is saved as a .mat file)"
-    )
-    print("- Numpy arrays of dicts are converted to lists of dicts (an intented feature)")
-
-
-if __name__ == "__main__":
-    test_py2mat()
diff --git a/src/eegprep/functions/eegobj/eegobj.py b/src/eegprep/functions/eegobj/eegobj.py
index ad060af8..ca13515f 100644
--- a/src/eegprep/functions/eegobj/eegobj.py
+++ b/src/eegprep/functions/eegobj/eegobj.py
@@ -255,8 +255,3 @@ def _safe(val, default=''):
         return '\n'.join(lines)
 
     __str__ = __repr__
-
-
-if __name__ == '__main__':
-    obj = EEGobj('sample_data/eeglab_data.set')
-    print(obj)
diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py
index dfcf9098..4e74e90c 100644
--- a/src/eegprep/functions/guifunc/menu_actions.py
+++ b/src/eegprep/functions/guifunc/menu_actions.py
@@ -1423,34 +1423,36 @@ def _run_dipfit_function(self, name: str, parent: Any | None) -> None:
         if name == "pop_dipfit_headmodel":
             from eegprep.plugins.dipfit.pop_dipfit_headmodel import pop_dipfit_headmodel
 
-            pop_dipfit_headmodel(selection, return_com=True)
-            return
-        if name == "pop_dipfit_gridsearch":
+            out = pop_dipfit_headmodel(selection, return_com=True)
+        elif name == "pop_dipfit_gridsearch":
             from eegprep.plugins.dipfit.pop_dipfit_gridsearch import pop_dipfit_gridsearch
 
-            pop_dipfit_gridsearch(selection, return_com=True)
-            return
-        if name == "pop_dipfit_nonlinear":
+            out = pop_dipfit_gridsearch(selection, return_com=True)
+        elif name == "pop_dipfit_nonlinear":
             from eegprep.plugins.dipfit.pop_dipfit_nonlinear import pop_dipfit_nonlinear
 
-            pop_dipfit_nonlinear(selection, return_com=True)
-            return
-        if name == "pop_multifit":
+            out = pop_dipfit_nonlinear(selection, return_com=True)
+        elif name == "pop_multifit":
             from eegprep.plugins.dipfit.pop_multifit import pop_multifit
 
-            pop_multifit(selection, return_com=True)
-            return
-        if name == "pop_leadfield":
+            out = pop_multifit(selection, return_com=True)
+        elif name == "pop_leadfield":
             from eegprep.plugins.dipfit.pop_leadfield import pop_leadfield
 
-            pop_leadfield(selection, return_com=True)
-            return
-        if name == "pop_dipfit_loreta":
+            out = pop_leadfield(selection, return_com=True)
+        elif name == "pop_dipfit_loreta":
             from eegprep.plugins.dipfit.pop_dipfit_loreta import pop_dipfit_loreta
 
-            pop_dipfit_loreta(selection, return_com=True)
+            out = pop_dipfit_loreta(selection, return_com=True)
+        else:
+            self.show_coming_soon(name, parent)
+            return
+        if not isinstance(out, tuple):
             return
-        self.show_coming_soon(name, parent)
+        eeg_out, command = out[0], out[1] if len(out) > 1 else ""
+        if command:
+            self._store_current_from_gui(eeg_out, command=command)
+            self._refresh()
 
     def _plot_channel_locations(self, variant: str, parent: Any | None) -> None:
         selection = self._current_selection_or_warn(parent)
diff --git a/src/eegprep/functions/guifunc/visual_capture.py b/src/eegprep/functions/guifunc/visual_capture.py
index fd51ef21..bb6f89c1 100644
--- a/src/eegprep/functions/guifunc/visual_capture.py
+++ b/src/eegprep/functions/guifunc/visual_capture.py
@@ -7,7 +7,7 @@
 import os
 import pathlib
 import sys
-from typing import Any
+from typing import Any, Callable
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -1339,237 +1339,171 @@ def capture_pophelp_dialog(output: pathlib.Path, function_name: str) -> None:
     _grab_dialog(dialog, output, app)
 
 
+CaptureHandler = Callable[[pathlib.Path], None]
+
+
+def _capture_with(function: Callable[..., None], **kwargs: Any) -> CaptureHandler:
+    def handler(output: pathlib.Path) -> None:
+        function(output, **kwargs)
+
+    return handler
+
+
+def _capture_case_handlers() -> dict[str, CaptureHandler]:
+    handlers: dict[str, CaptureHandler] = {
+        "adjust_events_dialog": capture_adjust_events_dialog,
+        "main_window": capture_main_window,
+        "main_window_continuous": _capture_with(capture_main_window, state="continuous"),
+        "main_window_epoched": _capture_with(capture_main_window, state="epoched"),
+        "main_window_multiple": _capture_with(capture_main_window, state="multiple"),
+        "main_window_study": _capture_with(capture_main_window, state="study"),
+        "file_menu": _capture_with(capture_main_window, menu_label="File"),
+        "edit_menu": _capture_with(capture_main_window, menu_label="Edit"),
+        "tools_menu": _capture_with(capture_main_window, menu_label="Tools"),
+        "plot_menu": _capture_with(capture_main_window, menu_label="Plot"),
+        "study_menu": _capture_with(capture_main_window, menu_label="Study"),
+        "datasets_menu": _capture_with(capture_main_window, menu_label="Datasets"),
+        "help_menu": _capture_with(capture_main_window, menu_label="Help"),
+        "pop_comments_dialog": capture_pop_comments_dialog,
+        "pop_editset_dialog": capture_pop_editset_dialog,
+        "pop_editeventfield_dialog": capture_pop_editeventfield_dialog,
+        "pop_editeventvals_dialog": capture_pop_editeventvals_dialog,
+        "pop_selectevent_dialog": capture_pop_selectevent_dialog,
+        "pop_rmdat_dialog": capture_pop_rmdat_dialog,
+        "pop_chanedit_dialog": capture_pop_chanedit_dialog,
+        "pop_copyset_dialog": capture_pop_copyset_dialog,
+        "pop_mergeset_dialog": capture_pop_mergeset_dialog,
+        "pop_study_dialog": capture_pop_study_dialog,
+        "pop_studydesign_dialog": capture_pop_studydesign_dialog,
+        "pop_precomp_dialog": capture_pop_precomp_dialog,
+        "pop_preclust_dialog": capture_pop_preclust_dialog,
+        "pop_clust_dialog": capture_pop_clust_dialog,
+        "pop_chanplot_dialog": capture_pop_chanplot_dialog,
+        "pop_clustedit_dialog": capture_pop_clustedit_dialog,
+        "reref_dialog": capture_reref_dialog,
+        "reref_dialog_channel_ref": _capture_with(capture_reref_dialog, variant="channels"),
+        "reref_dialog_huber_ref": _capture_with(capture_reref_dialog, variant="huber"),
+        "reref_dialog_interp_removed": _capture_with(capture_reref_dialog, variant="interp_removed"),
+        "pop_interp_dialog": capture_pop_interp_dialog,
+        "pop_interp_removed_dialog": _capture_with(capture_pop_interp_dialog, variant="removed"),
+        "pop_interp_epoched_dialog": _capture_with(capture_pop_interp_dialog, variant="epoched"),
+        "pop_select_dialog": capture_pop_select_dialog,
+        "pop_resample_dialog": capture_pop_resample_dialog,
+        "pop_newset_dialog": capture_pop_newset_dialog,
+        "pop_rmbase_dialog": capture_pop_rmbase_dialog,
+        "pop_eegfilt_dialog": capture_pop_eegfilt_dialog,
+        "pop_eegfiltnew_dialog": capture_pop_eegfiltnew_dialog,
+        "pop_firws_dialog": capture_pop_firws_dialog,
+        "pop_firpm_dialog": capture_pop_firpm_dialog,
+        "pop_firma_dialog": capture_pop_firma_dialog,
+        "pop_kaiserbeta_dialog": capture_pop_kaiserbeta_dialog,
+        "pop_firwsord_dialog": capture_pop_firwsord_dialog,
+        "pop_firpmord_dialog": capture_pop_firpmord_dialog,
+        "pop_xfirws_dialog": capture_pop_xfirws_dialog,
+        "pop_epoch_dialog": capture_pop_epoch_dialog,
+        "pop_topoplot_erp_dialog": _capture_with(capture_pop_topoplot_dialog, variant="erp"),
+        "pop_topoplot_components_dialog": _capture_with(capture_pop_topoplot_dialog, variant="components"),
+        "pop_spectopo_channels_dialog": _capture_with(capture_pop_spectopo_dialog, variant="channels"),
+        "pop_spectopo_components_dialog": _capture_with(capture_pop_spectopo_dialog, variant="components"),
+        "pop_prop_channels_dialog": _capture_with(capture_pop_prop_dialog, variant="channels"),
+        "pop_prop_components_dialog": _capture_with(capture_pop_prop_dialog, variant="components"),
+        "pop_timtopo_dialog": capture_pop_timtopo_dialog,
+        "pop_plottopo_dialog": capture_pop_plottopo_dialog,
+        "pop_headplot_erp_dialog": _capture_with(capture_pop_headplot_dialog, variant="erp"),
+        "pop_headplot_components_dialog": _capture_with(capture_pop_headplot_dialog, variant="components"),
+        "coregister_dialog": capture_coregister_dialog,
+        "pop_plotdata_dialog": capture_pop_plotdata_dialog,
+        "pop_erpimage_channels_dialog": _capture_with(capture_pop_erpimage_dialog, variant="channels"),
+        "pop_erpimage_components_dialog": _capture_with(capture_pop_erpimage_dialog, variant="components"),
+        "pop_envtopo_dialog": capture_pop_envtopo_dialog,
+        "pop_comperp_channels_dialog": _capture_with(capture_pop_comperp_dialog, variant="channels"),
+        "pop_comperp_components_dialog": _capture_with(capture_pop_comperp_dialog, variant="components"),
+        "pop_newtimef_channels_dialog": _capture_with(capture_pop_newtimef_dialog, variant="channels"),
+        "pop_newtimef_components_dialog": _capture_with(capture_pop_newtimef_dialog, variant="components"),
+        "pop_newcrossf_channels_dialog": _capture_with(capture_pop_newcrossf_dialog, variant="channels"),
+        "pop_newcrossf_components_dialog": _capture_with(capture_pop_newcrossf_dialog, variant="components"),
+        "pop_signalstat_channels_dialog": _capture_with(capture_pop_signalstat_dialog, variant="channels"),
+        "pop_signalstat_components_dialog": _capture_with(capture_pop_signalstat_dialog, variant="components"),
+        "pop_eventstat_dialog": capture_pop_eventstat_dialog,
+        "pop_runica_dialog": capture_pop_runica_dialog,
+        "pop_runica_multiple_dialog": capture_pop_runica_multiple_dialog,
+        "pop_iclabel_dialog": capture_pop_iclabel_dialog,
+        "pop_icflag_dialog": capture_pop_icflag_dialog,
+        "iclabel_pop_prop_extended_dashboard": capture_pop_prop_extended_dashboard,
+        "pop_subcomp_dialog": capture_pop_subcomp_dialog,
+        "pop_clean_rawdata_dialog": capture_pop_clean_rawdata_dialog,
+        "pop_chansel_dialog": capture_pop_chansel_dialog,
+        "select_multiple_datasets_dialog": capture_select_multiple_datasets_dialog,
+        "pop_interp_dataset_index_dialog": capture_dataset_index_dialog,
+        "pop_reref_help_dialog": _capture_with(capture_pophelp_dialog, function_name="pop_reref"),
+        "pop_interp_help_dialog": _capture_with(capture_pophelp_dialog, function_name="pop_interp"),
+    }
+    handlers.update(
+        {
+            f"eegbrowser_{variant}": _capture_with(capture_eegbrowser, variant=variant)
+            for variant in (
+                "continuous",
+                "continuous_marked",
+                "epoched",
+                "epoched_marked",
+                "events",
+                "grid_off",
+                "labels",
+                "component_activity",
+                "data2_overlay",
+                "spectral_overlay",
+                "pop_eegplot_reject_data",
+                "rejcont_continuous",
+                "rejection_epochs",
+            )
+        }
+    )
+    handlers.update(
+        {
+            case_id: _capture_with(capture_rejection_dialog, case_id=case_id)
+            for case_id in (
+                "pop_autorej_dialog",
+                "pop_eegthresh_dialog",
+                "pop_jointprob_dialog",
+                "pop_rejchan_dialog",
+                "pop_rejcont_dialog",
+                "pop_rejkurt_dialog",
+                "pop_rejmenu_dialog",
+                "pop_rejspec_dialog",
+                "pop_rejtrend_dialog",
+                "pop_selectcomps_dialog",
+                "pop_viewprops_dialog",
+            )
+        }
+    )
+    handlers.update(
+        {
+            case_id: _capture_with(capture_dipfit_dialog, case_id=case_id)
+            for case_id in (
+                "pop_dipfit_settings_dialog",
+                "pop_dipfit_headmodel_dialog",
+                "pop_dipfit_gridsearch_dialog",
+                "pop_dipfit_nonlinear_dialog",
+                "pop_dipplot_dialog",
+                "pop_multifit_dialog",
+                "pop_leadfield_dialog",
+                "pop_dipfit_loreta_dialog",
+            )
+        }
+    )
+    return handlers
+
+
 def main(argv: list[str] | None = None) -> int:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--case", required=True)
     parser.add_argument("--output", required=True, type=pathlib.Path)
     args = parser.parse_args(argv)
 
-    if args.case == "adjust_events_dialog":
-        capture_adjust_events_dialog(args.output)
-    elif args.case == "main_window":
-        capture_main_window(args.output)
-    elif args.case == "main_window_continuous":
-        capture_main_window(args.output, state="continuous")
-    elif args.case == "main_window_epoched":
-        capture_main_window(args.output, state="epoched")
-    elif args.case == "main_window_multiple":
-        capture_main_window(args.output, state="multiple")
-    elif args.case == "main_window_study":
-        capture_main_window(args.output, state="study")
-    elif args.case == "eegbrowser_continuous":
-        capture_eegbrowser(args.output, variant="continuous")
-    elif args.case == "eegbrowser_continuous_marked":
-        capture_eegbrowser(args.output, variant="continuous_marked")
-    elif args.case == "eegbrowser_epoched":
-        capture_eegbrowser(args.output, variant="epoched")
-    elif args.case == "eegbrowser_epoched_marked":
-        capture_eegbrowser(args.output, variant="epoched_marked")
-    elif args.case == "eegbrowser_events":
-        capture_eegbrowser(args.output, variant="events")
-    elif args.case == "eegbrowser_grid_off":
-        capture_eegbrowser(args.output, variant="grid_off")
-    elif args.case == "eegbrowser_labels":
-        capture_eegbrowser(args.output, variant="labels")
-    elif args.case == "eegbrowser_component_activity":
-        capture_eegbrowser(args.output, variant="component_activity")
-    elif args.case == "eegbrowser_data2_overlay":
-        capture_eegbrowser(args.output, variant="data2_overlay")
-    elif args.case == "eegbrowser_spectral_overlay":
-        capture_eegbrowser(args.output, variant="spectral_overlay")
-    elif args.case == "eegbrowser_pop_eegplot_reject_data":
-        capture_eegbrowser(args.output, variant="pop_eegplot_reject_data")
-    elif args.case == "eegbrowser_rejcont_continuous":
-        capture_eegbrowser(args.output, variant="rejcont_continuous")
-    elif args.case == "eegbrowser_rejection_epochs":
-        capture_eegbrowser(args.output, variant="rejection_epochs")
-    elif args.case == "file_menu":
-        capture_main_window(args.output, menu_label="File")
-    elif args.case == "edit_menu":
-        capture_main_window(args.output, menu_label="Edit")
-    elif args.case == "tools_menu":
-        capture_main_window(args.output, menu_label="Tools")
-    elif args.case == "plot_menu":
-        capture_main_window(args.output, menu_label="Plot")
-    elif args.case == "study_menu":
-        capture_main_window(args.output, menu_label="Study")
-    elif args.case == "datasets_menu":
-        capture_main_window(args.output, menu_label="Datasets")
-    elif args.case == "help_menu":
-        capture_main_window(args.output, menu_label="Help")
-    elif args.case == "pop_comments_dialog":
-        capture_pop_comments_dialog(args.output)
-    elif args.case == "pop_editset_dialog":
-        capture_pop_editset_dialog(args.output)
-    elif args.case == "pop_editeventfield_dialog":
-        capture_pop_editeventfield_dialog(args.output)
-    elif args.case == "pop_editeventvals_dialog":
-        capture_pop_editeventvals_dialog(args.output)
-    elif args.case == "pop_selectevent_dialog":
-        capture_pop_selectevent_dialog(args.output)
-    elif args.case == "pop_rmdat_dialog":
-        capture_pop_rmdat_dialog(args.output)
-    elif args.case == "pop_chanedit_dialog":
-        capture_pop_chanedit_dialog(args.output)
-    elif args.case == "pop_copyset_dialog":
-        capture_pop_copyset_dialog(args.output)
-    elif args.case == "pop_mergeset_dialog":
-        capture_pop_mergeset_dialog(args.output)
-    elif args.case == "pop_study_dialog":
-        capture_pop_study_dialog(args.output)
-    elif args.case == "pop_studydesign_dialog":
-        capture_pop_studydesign_dialog(args.output)
-    elif args.case == "pop_precomp_dialog":
-        capture_pop_precomp_dialog(args.output)
-    elif args.case == "pop_preclust_dialog":
-        capture_pop_preclust_dialog(args.output)
-    elif args.case == "pop_clust_dialog":
-        capture_pop_clust_dialog(args.output)
-    elif args.case == "pop_chanplot_dialog":
-        capture_pop_chanplot_dialog(args.output)
-    elif args.case == "pop_clustedit_dialog":
-        capture_pop_clustedit_dialog(args.output)
-    elif args.case == "reref_dialog":
-        capture_reref_dialog(args.output)
-    elif args.case == "reref_dialog_channel_ref":
-        capture_reref_dialog(args.output, variant="channels")
-    elif args.case == "reref_dialog_huber_ref":
-        capture_reref_dialog(args.output, variant="huber")
-    elif args.case == "reref_dialog_interp_removed":
-        capture_reref_dialog(args.output, variant="interp_removed")
-    elif args.case == "pop_interp_dialog":
-        capture_pop_interp_dialog(args.output)
-    elif args.case == "pop_interp_removed_dialog":
-        capture_pop_interp_dialog(args.output, variant="removed")
-    elif args.case == "pop_interp_epoched_dialog":
-        capture_pop_interp_dialog(args.output, variant="epoched")
-    elif args.case == "pop_select_dialog":
-        capture_pop_select_dialog(args.output)
-    elif args.case == "pop_resample_dialog":
-        capture_pop_resample_dialog(args.output)
-    elif args.case == "pop_newset_dialog":
-        capture_pop_newset_dialog(args.output)
-    elif args.case == "pop_rmbase_dialog":
-        capture_pop_rmbase_dialog(args.output)
-    elif args.case == "pop_eegfilt_dialog":
-        capture_pop_eegfilt_dialog(args.output)
-    elif args.case == "pop_eegfiltnew_dialog":
-        capture_pop_eegfiltnew_dialog(args.output)
-    elif args.case == "pop_firws_dialog":
-        capture_pop_firws_dialog(args.output)
-    elif args.case == "pop_firpm_dialog":
-        capture_pop_firpm_dialog(args.output)
-    elif args.case == "pop_firma_dialog":
-        capture_pop_firma_dialog(args.output)
-    elif args.case == "pop_kaiserbeta_dialog":
-        capture_pop_kaiserbeta_dialog(args.output)
-    elif args.case == "pop_firwsord_dialog":
-        capture_pop_firwsord_dialog(args.output)
-    elif args.case == "pop_firpmord_dialog":
-        capture_pop_firpmord_dialog(args.output)
-    elif args.case == "pop_xfirws_dialog":
-        capture_pop_xfirws_dialog(args.output)
-    elif args.case == "pop_epoch_dialog":
-        capture_pop_epoch_dialog(args.output)
-    elif args.case == "pop_topoplot_erp_dialog":
-        capture_pop_topoplot_dialog(args.output, variant="erp")
-    elif args.case == "pop_topoplot_components_dialog":
-        capture_pop_topoplot_dialog(args.output, variant="components")
-    elif args.case == "pop_spectopo_channels_dialog":
-        capture_pop_spectopo_dialog(args.output, variant="channels")
-    elif args.case == "pop_spectopo_components_dialog":
-        capture_pop_spectopo_dialog(args.output, variant="components")
-    elif args.case == "pop_prop_channels_dialog":
-        capture_pop_prop_dialog(args.output, variant="channels")
-    elif args.case == "pop_prop_components_dialog":
-        capture_pop_prop_dialog(args.output, variant="components")
-    elif args.case == "pop_timtopo_dialog":
-        capture_pop_timtopo_dialog(args.output)
-    elif args.case == "pop_plottopo_dialog":
-        capture_pop_plottopo_dialog(args.output)
-    elif args.case == "pop_headplot_erp_dialog":
-        capture_pop_headplot_dialog(args.output, variant="erp")
-    elif args.case == "pop_headplot_components_dialog":
-        capture_pop_headplot_dialog(args.output, variant="components")
-    elif args.case == "coregister_dialog":
-        capture_coregister_dialog(args.output)
-    elif args.case == "pop_plotdata_dialog":
-        capture_pop_plotdata_dialog(args.output)
-    elif args.case == "pop_erpimage_channels_dialog":
-        capture_pop_erpimage_dialog(args.output, variant="channels")
-    elif args.case == "pop_erpimage_components_dialog":
-        capture_pop_erpimage_dialog(args.output, variant="components")
-    elif args.case == "pop_envtopo_dialog":
-        capture_pop_envtopo_dialog(args.output)
-    elif args.case == "pop_comperp_channels_dialog":
-        capture_pop_comperp_dialog(args.output, variant="channels")
-    elif args.case == "pop_comperp_components_dialog":
-        capture_pop_comperp_dialog(args.output, variant="components")
-    elif args.case == "pop_newtimef_channels_dialog":
-        capture_pop_newtimef_dialog(args.output, variant="channels")
-    elif args.case == "pop_newtimef_components_dialog":
-        capture_pop_newtimef_dialog(args.output, variant="components")
-    elif args.case == "pop_newcrossf_channels_dialog":
-        capture_pop_newcrossf_dialog(args.output, variant="channels")
-    elif args.case == "pop_newcrossf_components_dialog":
-        capture_pop_newcrossf_dialog(args.output, variant="components")
-    elif args.case == "pop_signalstat_channels_dialog":
-        capture_pop_signalstat_dialog(args.output, variant="channels")
-    elif args.case == "pop_signalstat_components_dialog":
-        capture_pop_signalstat_dialog(args.output, variant="components")
-    elif args.case == "pop_eventstat_dialog":
-        capture_pop_eventstat_dialog(args.output)
-    elif args.case == "pop_runica_dialog":
-        capture_pop_runica_dialog(args.output)
-    elif args.case == "pop_runica_multiple_dialog":
-        capture_pop_runica_multiple_dialog(args.output)
-    elif args.case == "pop_iclabel_dialog":
-        capture_pop_iclabel_dialog(args.output)
-    elif args.case == "pop_icflag_dialog":
-        capture_pop_icflag_dialog(args.output)
-    elif args.case == "iclabel_pop_prop_extended_dashboard":
-        capture_pop_prop_extended_dashboard(args.output)
-    elif args.case == "pop_subcomp_dialog":
-        capture_pop_subcomp_dialog(args.output)
-    elif args.case in {
-        "pop_autorej_dialog",
-        "pop_eegthresh_dialog",
-        "pop_jointprob_dialog",
-        "pop_rejchan_dialog",
-        "pop_rejcont_dialog",
-        "pop_rejkurt_dialog",
-        "pop_rejmenu_dialog",
-        "pop_rejspec_dialog",
-        "pop_rejtrend_dialog",
-        "pop_selectcomps_dialog",
-        "pop_viewprops_dialog",
-    }:
-        capture_rejection_dialog(args.output, case_id=args.case)
-    elif args.case in {
-        "pop_dipfit_settings_dialog",
-        "pop_dipfit_headmodel_dialog",
-        "pop_dipfit_gridsearch_dialog",
-        "pop_dipfit_nonlinear_dialog",
-        "pop_dipplot_dialog",
-        "pop_multifit_dialog",
-        "pop_leadfield_dialog",
-        "pop_dipfit_loreta_dialog",
-    }:
-        capture_dipfit_dialog(args.output, case_id=args.case)
-    elif args.case == "pop_clean_rawdata_dialog":
-        capture_pop_clean_rawdata_dialog(args.output)
-    elif args.case == "pop_chansel_dialog":
-        capture_pop_chansel_dialog(args.output)
-    elif args.case == "select_multiple_datasets_dialog":
-        capture_select_multiple_datasets_dialog(args.output)
-    elif args.case == "pop_interp_dataset_index_dialog":
-        capture_dataset_index_dialog(args.output)
-    elif args.case == "pop_reref_help_dialog":
-        capture_pophelp_dialog(args.output, "pop_reref")
-    elif args.case == "pop_interp_help_dialog":
-        capture_pophelp_dialog(args.output, "pop_interp")
-    else:
+    handler = _capture_case_handlers().get(args.case)
+    if handler is None:
         parser.error(f"unsupported EEGPrep visual capture case: {args.case}")
+    handler(args.output)
     return 0
 
 
diff --git a/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py b/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py
index 7567697c..749b703d 100644
--- a/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py
+++ b/src/eegprep/functions/miscfunc/save_struct_as_hdf5.py
@@ -52,21 +52,3 @@ def save_dict_to_hdf5(data, filename, dataset_name):
     # Save to HDF5
     with h5py.File(filename, 'w') as hdf:
         hdf.create_dataset(dataset_name, data=structured_data)
-
-
-if __name__ == '__main__':
-    data = {
-        'labels': 'FPz',
-        'theta': np.array([0, 1, 2, 3]),
-        'radius': 0.5066888888888889,
-        'X': 84.98123361344625,
-        'Y': 0,
-        'Z': -1.7860385037488253,
-        'sph_theta': 0,
-        'sph_phi': -1.203999999999994,
-        'sph_radius': 85,
-        'type': 'EEG',
-        'urchan': 1,
-        'ref': None,
-    }
-    save_dict_to_hdf5(data, 'data.h5', 'dataset_name')
diff --git a/src/eegprep/functions/popfunc/eeg_compare.py b/src/eegprep/functions/popfunc/eeg_compare.py
index 8cd52c48..468e3a6a 100644
--- a/src/eegprep/functions/popfunc/eeg_compare.py
+++ b/src/eegprep/functions/popfunc/eeg_compare.py
@@ -388,16 +388,3 @@ def get_val2(f):
         raise ValueError(error_message)
 
     return summary
-
-
-# add test data and compare with it
-
-# load test data
-if __name__ == '__main__':
-    from eegprep import pop_loadset
-
-    eeg1 = pop_loadset('../../sample_data/eeglab_data_tmp.set')
-    eeg2 = pop_loadset('../../sample_data/eeglab_data_tmp.set')
-
-    # compare
-    eeg_compare(eeg1, eeg2)
diff --git a/src/eegprep/functions/popfunc/eeg_eegrej.py b/src/eegprep/functions/popfunc/eeg_eegrej.py
index a3e01616..40958f20 100644
--- a/src/eegprep/functions/popfunc/eeg_eegrej.py
+++ b/src/eegprep/functions/popfunc/eeg_eegrej.py
@@ -1,11 +1,15 @@
 """EEG data rejection functions."""
 
+import logging
 from typing import List, Dict, Optional, Tuple
 import numpy as np
 from copy import deepcopy
 from ..miscfunc.misc import round_mat
 
 
+logger = logging.getLogger(__name__)
+
+
 def _is_boundary_event(event: Dict) -> bool:
     t = event.get("type")
     if isinstance(t, str):
@@ -359,7 +363,7 @@ def _combine_regions(regs):
             merged.append([beg, end])
     newregs = np.asarray(merged, dtype=np.int64)
     if newregs.shape[0] != regs.shape[0]:
-        print("Warning: overlapping regions detected and fixed in eeg_eegrej")
+        logger.warning("Overlapping regions detected and fixed in eeg_eegrej")
     return newregs
 
 
diff --git a/src/eegprep/functions/popfunc/eeg_interp.py b/src/eegprep/functions/popfunc/eeg_interp.py
index 57f62b2a..8ddf8eb8 100644
--- a/src/eegprep/functions/popfunc/eeg_interp.py
+++ b/src/eegprep/functions/popfunc/eeg_interp.py
@@ -4,21 +4,12 @@
 methods including spherical spline interpolation.
 """
 
-# to do, look at line 83 and 84 and try to see if the MATLAB array output match. Run code side by side.
-
-# EEG = pop_loadset('sample_data/eeglab_data_tmp.set');
-# EEG = eeg_interp(EEG, [1, 2, 3], 'spherical'); % or EEG = eeg_interp(EEG, {'Fp1' 'Fp2' 'F7'}, 'spherical');
-# pop_save(EEG, 'sample_data/eeglab_data_tmp_out_matlab.set');
-
 import numpy as np
 from scipy.linalg import pinv
 from scipy.interpolate import RBFInterpolator, griddata
 from scipy.special import lpmv
 from copy import deepcopy
 
-# absolute path for all files in data folder
-data_path = '/Users/arno/Python/eegprep/sample_data/'  # os.path.abspath('sample_data/')
-
 
 def eeg_interp(EEG, bad_chans, method='spherical', t_range=None, params=None, dtype='float32'):
     """Interpolate missing or bad EEG channels using spherical spline.
@@ -525,153 +516,3 @@ def computeg(x, y, z, xelec, yelec, zelec, params):
         g += ((2 * n + 1) / (n**m * (n + 1) ** m)) * Pn
 
     return g / (4 * np.pi)
-
-
-# Test functions moved to tests/test_eeg_interp.py
-
-
-def test_chanloc_interpolation():
-    """Example usage of the new chanloc interpolation functionality.
-
-    This demonstrates the three different cases.
-    """
-    # Create a sample EEG structure
-    EEG = {
-        'data': np.random.randn(4, 100, 1),  # 4 channels, 100 time points, 1 trial
-        'nbchan': 4,
-        'pnts': 100,
-        'trials': 1,
-        'srate': 500,
-        'xmin': 0,
-        'xmax': 0.2,
-        'chanlocs': [
-            {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6},
-            {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6},
-            {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7},
-            {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7},
-        ],
-    }
-
-    print("Original EEG structure:")
-    print(f"Data shape: {EEG['data'].shape}")
-    print(f"Number of channels: {EEG['nbchan']}")
-    print(f"Channel labels: {[ch['labels'] for ch in EEG['chanlocs']]}")
-
-    # Case 1: Identical chanlocs (should return unchanged)
-    identical_chanlocs = EEG['chanlocs'].copy()
-    result1 = eeg_interp(EEG.copy(), identical_chanlocs)
-    print("\nCase 1 - Identical chanlocs:")
-    print(f"Data shape unchanged: {result1['data'].shape == EEG['data'].shape}")
-    print(f"Data is identical: {np.array_equal(result1['data'], EEG['data'])}")
-
-    # Case 2: No overlap (should append new channels)
-    new_chanlocs = [
-        {'labels': 'T7', 'X': 0.8, 'Y': 0.0, 'Z': 0.6},
-        {'labels': 'T8', 'X': -0.8, 'Y': 0.0, 'Z': 0.6},
-    ]
-    result2 = eeg_interp(EEG.copy(), new_chanlocs)
-    print("\nCase 2 - No overlap (append new channels):")
-    print(f"Original channels: {EEG['nbchan']}, After: {result2['nbchan']}")
-    print(f"Data shape: {EEG['data'].shape} -> {result2['data'].shape}")
-    print(f"New channel labels: {[ch['labels'] for ch in result2['chanlocs']]}")
-
-    # Case 3: Existing channels are proper subset (should remap to new structure)
-    superset_chanlocs = [
-        {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6},
-        {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6},
-        {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7},
-        {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7},
-        {'labels': 'C3', 'X': 0.6, 'Y': 0.0, 'Z': 0.8},
-        {'labels': 'C4', 'X': -0.6, 'Y': 0.0, 'Z': 0.8},
-    ]
-    result3 = eeg_interp(EEG.copy(), superset_chanlocs)
-    print("\nCase 3 - Existing subset of new structure:")
-    print(f"Original channels: {EEG['nbchan']}, After: {result3['nbchan']}")
-    print(f"Data shape: {EEG['data'].shape} -> {result3['data'].shape}")
-    print(f"Final channel labels: {[ch['labels'] for ch in result3['chanlocs']]}")
-
-    return result1, result2, result3
-
-
-def test_ica_indices_update():
-    """Test that ICA channel indices are properly updated when channels are.
-
-    reordered.
-
-    Test that ICA channel indices are properly updated when channels are
-
-    reordered during interpolation with chanloc structures.
-    """
-    # Create a sample EEG structure with ICA data
-    EEG = {
-        'data': np.random.randn(4, 100, 1),  # 4 channels, 100 time points, 1 trial
-        'nbchan': 4,
-        'pnts': 100,
-        'trials': 1,
-        'srate': 500,
-        'xmin': 0,
-        'xmax': 0.2,
-        'chanlocs': [
-            {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6},
-            {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6},
-            {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7},
-            {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7},
-        ],
-        # Add ICA fields
-        'icasphere': np.eye(4),  # 4x4 identity matrix (not empty)
-        'icaweights': np.random.randn(4, 4),
-        'icawinv': np.random.randn(4, 4),
-        'icachansind': [0, 1, 2, 3],  # All channels used for ICA (0-based)
-        'chaninfo': {
-            'icachansind': [0, 1, 2, 3],
-        },
-    }
-
-    print("Original EEG structure with ICA:")
-    print(f"Data shape: {EEG['data'].shape}")
-    print(f"Number of channels: {EEG['nbchan']}")
-    print(f"Channel labels: {[ch['labels'] for ch in EEG['chanlocs']]}")
-    print(f"ICA channel indices: {EEG['icachansind']}")
-    print(f"Chaninfo ICA indices: {EEG['chaninfo']['icachansind']}")
-
-    # Test Case: Subset interpolation that causes channel reordering
-    # Create a superset where the existing channels appear in different order
-    superset_chanlocs = [
-        {'labels': 'F3', 'X': 0.4, 'Y': 0.6, 'Z': 0.7},  # was index 2, now 0
-        {'labels': 'Fp1', 'X': 0.1, 'Y': 0.8, 'Z': 0.6},  # was index 0, now 1
-        {'labels': 'C3', 'X': 0.6, 'Y': 0.0, 'Z': 0.8},  # new channel, index 2
-        {'labels': 'Fp2', 'X': -0.1, 'Y': 0.8, 'Z': 0.6},  # was index 1, now 3
-        {'labels': 'F4', 'X': -0.4, 'Y': 0.6, 'Z': 0.7},  # was index 3, now 4
-        {'labels': 'C4', 'X': -0.6, 'Y': 0.0, 'Z': 0.8},  # new channel, index 5
-    ]
-
-    result = eeg_interp(EEG.copy(), superset_chanlocs)
-
-    print("\nAfter interpolation with reordering:")
-    print(f"Data shape: {EEG['data'].shape} -> {result['data'].shape}")
-    print(f"Number of channels: {EEG['nbchan']} -> {result['nbchan']}")
-    print(f"Channel labels: {[ch['labels'] for ch in result['chanlocs']]}")
-    print(f"ICA channel indices: {EEG['icachansind']} -> {result['icachansind']}")
-
-    # Verify the mapping is correct:
-    # Original: Fp1=0, Fp2=1, F3=2, F4=3
-    # New:      F3=0,  Fp1=1, C3=2, Fp2=3, F4=4, C4=5
-    # So ICA indices should be updated: [0,1,2,3] -> [1,3,0,4]
-    expected_indices = [1, 3, 0, 4]  # New positions of Fp1, Fp2, F3, F4
-
-    print(f"Expected ICA indices: {expected_indices}")
-    print(f"Actual ICA indices: {result['icachansind']}")
-    print(f"Mapping correct: {result['icachansind'] == expected_indices}")
-
-    # Also verify chaninfo is updated
-    if 'chaninfo' in result and 'icachansind' in result['chaninfo']:
-        print(f"Chaninfo ICA indices: {result['chaninfo']['icachansind']}")
-        print(f"Chaninfo mapping correct: {result['chaninfo']['icachansind'] == expected_indices}")
-
-    return result
-
-
-# Uncomment to run the tests
-# if __name__ == '__main__':
-#     test_chanloc_interpolation()
-#     test_ica_indices_update()
diff --git a/src/eegprep/functions/popfunc/eeg_lat2point.py b/src/eegprep/functions/popfunc/eeg_lat2point.py
index fd98161c..49c813d1 100644
--- a/src/eegprep/functions/popfunc/eeg_lat2point.py
+++ b/src/eegprep/functions/popfunc/eeg_lat2point.py
@@ -1,8 +1,13 @@
 """EEG latency to point conversion utilities."""
 
+import logging
+
 import numpy as np
 
 
+logger = logging.getLogger(__name__)
+
+
 def eeg_lat2point(lat_array, epoch_array, srate, timewin, timeunit=1.0, **kwargs):
     """Convert latencies in time units (relative to per-epoch time 0) to latencies in data points assuming concatenated epochs (EEGLAB style).
 
@@ -70,7 +75,7 @@ def eeg_lat2point(lat_array, epoch_array, srate, timewin, timeunit=1.0, **kwargs
                 newlat[idx] = max_valid
                 flag = 1
                 # mirror MATLAB's informational message
-                print('eeg_lat2point(): Points out of range detected. Points replaced with maximum value')
+                logger.warning("Points out of range detected. Points replaced with maximum value")
             else:
                 raise ValueError('Error in eeg_lat2point(): Points out of range detected')
 
diff --git a/src/eegprep/functions/popfunc/pop_load_frombids.py b/src/eegprep/functions/popfunc/pop_load_frombids.py
index 4e21a456..57b6fa18 100644
--- a/src/eegprep/functions/popfunc/pop_load_frombids.py
+++ b/src/eegprep/functions/popfunc/pop_load_frombids.py
@@ -1083,7 +1083,7 @@ def error(msg: str):
 
         EEG = eeg_checkchanlocs(EEG)
     except ImportError:
-        print("eeg_checkchanlocs not available, skipping channel location check.")
+        logger.info("eeg_checkchanlocs not available, skipping channel location check.")
 
     # Assign channel types based on channel labels (matching MATLAB's eeg_getchantype behavior)
     # Standard 10-20 channel names that should be classified as EEG
diff --git a/src/eegprep/functions/popfunc/pop_loadset.py b/src/eegprep/functions/popfunc/pop_loadset.py
index 5251e46a..f3bd30a4 100644
--- a/src/eegprep/functions/popfunc/pop_loadset.py
+++ b/src/eegprep/functions/popfunc/pop_loadset.py
@@ -186,19 +186,6 @@ def _is_on(value):
     return bool(value)
 
 
-def test_pop_loadset():
-    """Test the pop_loadset function with a sample file."""
-    file_path = './tmp2.set'
-    file_path = '/System/Volumes/Data/data/data/STUDIES/STERN/S04/Memorize.set'  #'./eeglab_data_with_ica_tmp.set'
-    EEG = pop_loadset(file_path)
-
-    # print the keys of the EEG dictionary
-    print(EEG.keys())
-
-
-if __name__ == "__main__":
-    test_pop_loadset()
-
 # STILL OPEN QUESTION: Better to have empty MATLAB arrays as None for empty numpy arrays (current default).
 # The current default is to make it more MALTAB compatible. A lot of MATLAB function start indexing MATLAB
 # empty arrays to add values to them. This is not possible with None and would create more conversion and
diff --git a/src/eegprep/functions/popfunc/pop_loadset_h5.py b/src/eegprep/functions/popfunc/pop_loadset_h5.py
index 0c491e24..3447882e 100644
--- a/src/eegprep/functions/popfunc/pop_loadset_h5.py
+++ b/src/eegprep/functions/popfunc/pop_loadset_h5.py
@@ -301,16 +301,3 @@ def handle_generic_group(EEGTMP, key):
     EEG = eeg_checkset(EEG)
 
     return EEG
-
-
-if __name__ == '__main__':
-    file_name = 'sample_data/eeglab_data_epochs_ica_hdf5.set'
-    EEG = pop_loadset_h5(file_name)
-    print(EEG['data'].shape)
-    print(EEG['icaweights'].shape)
-    print(EEG['icasphere'].shape)
-    print(EEG['icawinv'].shape)
-    print(EEG['icaact'].shape)
-# file_name = 'eeglab_cont73.set'
-# EEG = pop_loadset_h5(file_name)
-# EEG['data'].shape
diff --git a/src/eegprep/functions/popfunc/pop_saveset.py b/src/eegprep/functions/popfunc/pop_saveset.py
index e09df3a0..f11925dd 100644
--- a/src/eegprep/functions/popfunc/pop_saveset.py
+++ b/src/eegprep/functions/popfunc/pop_saveset.py
@@ -551,23 +551,6 @@ def _save_two_files(EEG, savemode):
     return bool(datfile and savemode == 'resave') or savetwofiles_enabled()
 
 
-def test_pop_saveset():
-    """Test pop_saveset function."""
-    from eegprep.functions.popfunc.pop_loadset import pop_loadset
-
-    file_path = './sample_data/eeglab_data_with_ica_tmp.set'
-    EEG = pop_loadset(file_path)
-    pop_saveset(EEG, '/Users/arno/Python/eegprep/sample_data/tmp.set')
-    pop_saveset_old(
-        EEG, '/Users/arno/Python/eegprep/sample_data/tmp2.set'
-    )  # does not do events and function above is better
-    # print the keys of the EEG dictionary
-    print(EEG.keys())
-
-
-if __name__ == '__main__':
-    test_pop_saveset()
-
 # STILL OPEN QUESTION: Better to have empty MATLAB arrays as None for empty numpy arrays (current default).
 # The current default is to make it more MALTAB compatible. A lot of MATLAB function start indexing MATLAB
 # empty arrays to add values to them. This is not possible with None and would create more conversion and
diff --git a/src/eegprep/functions/popfunc/pop_select.py b/src/eegprep/functions/popfunc/pop_select.py
index 38a8926c..bba0a0aa 100644
--- a/src/eegprep/functions/popfunc/pop_select.py
+++ b/src/eegprep/functions/popfunc/pop_select.py
@@ -1,4 +1,5 @@
 import copy
+import logging
 import re
 from typing import Any
 
@@ -18,6 +19,9 @@
 from eegprep.functions.popfunc.eeg_eegrej import eeg_eegrej
 
 
+logger = logging.getLogger(__name__)
+
+
 def pop_select(EEG, *args, gui=None, renderer=None, return_com=False, **kwargs):
     """Select EEG data using EEGLAB ``pop_select`` semantics."""
     options = parse_key_value_args(args, kwargs)
@@ -184,7 +188,7 @@ def _numeric_channel_indices(values):
                 inds, _ = eeg_decodechan(EEG, g['channel'], 'labels', True)
             # show warning if not all channels are found and error if no channels are found
             if len(inds) != len(g['channel']):
-                print(f"Warning: {len(g['channel']) - len(inds)} channels not found")
+                logger.warning("%s channels not found", len(g['channel']) - len(inds))
             if len(inds) == 0:
                 raise ValueError(f"Channels not found: {g['channel']}")
             chan_selected_flag[:] = False
@@ -199,7 +203,7 @@ def _numeric_channel_indices(values):
             chan_selected_flag[np.array(inds, dtype=int)] = False
             # show warning if not all channels are found and error if no channels are found
             if len(inds) != len(g['nochannel']):
-                print(f"Warning: {len(g['nochannel']) - len(inds)} channels not found")
+                logger.warning("%s channels not found", len(g['nochannel']) - len(inds))
 
     else:
         # by type
@@ -226,7 +230,7 @@ def _normalize_range_matrix(x):
             if x.size <= 2:
                 return np.array(x).reshape(1, 2)
             # vector form → [first last]
-            print('Warning: vector format for point/time range is deprecated')
+            logger.warning("Vector format for point/time range is deprecated")
             return np.array([x[0], x[-1]], dtype=float).reshape(1, 2)
         if x.shape[1] != 2:
             raise ValueError('Time/point range must contain exactly 2 columns')
@@ -289,14 +293,14 @@ def _clip_time_matrix(mat):
 
     # 4) Informational prints (optional)
     if len(g['trial']) != trials:
-        print(f"Removing {trials - len(g['trial'])} trial(s)...")
+        logger.info("Removing %s trial(s)...", trials - len(g['trial']))
     if len(g['channel']) != nbchan:
-        print(f"Removing {nbchan - len(g['channel'])} channel(s)...")
+        logger.info("Removing %s channel(s)...", nbchan - len(g['channel']))
 
     # 5) Recompute event epoch indices and latencies when trials are dropped
     if len(g['trial']) != trials and (EEG.get('event') is not None and len(EEG.get('event', [])) > 0):
         if not any('epoch' in ev for ev in EEG['event']):
-            print('Pop_epoch warning: bad event format with epoch dataset, removing events')
+            logger.warning("Bad event format with epoch dataset, removing events")
             EEG['event'] = []
         else:
             keepevent = []
@@ -317,7 +321,7 @@ def _clip_time_matrix(mat):
                         ev['epoch'] = int(newindex[0] + 1)  # back to 1-based for consistency
             diffevent = np.setdiff1d(np.arange(len(EEG['event'])), np.array(keepevent, dtype=int))
             if diffevent.size:
-                print(f"Pop_select: removing {diffevent.size} unreferenced events")
+                logger.info("Removing %s unreferenced events", diffevent.size)
                 EEG['event'] = [EEG['event'][i] for i in range(len(EEG['event'])) if i in keepevent]
 
     # 6) Apply time selection
@@ -434,7 +438,7 @@ def _clip_time_matrix(mat):
 
     # erase dipfit if channels removed
     if len(chan_idx) != nbchan and _has_content(EEG.get('dipfit')):
-        print('warning: erasing dipole information since channels have been removed')
+        logger.warning("Erasing dipole information since channels have been removed")
         EEG['dipfit'] = np.array([])
         EEG['roi'] = {}
 
@@ -769,11 +773,3 @@ def _history_command(options):
     for key, value in options.items():
         parts.extend([f"'{key}'", format_history_value(value, empty_sequence="{}")])
     return f"EEG = pop_select( EEG, {', '.join(parts)});"
-
-
-if __name__ == '__main__':
-    from eegprep.functions.popfunc.pop_loadset import pop_loadset
-
-    EEG = pop_loadset('sample_data/eeglab_data.set')
-    EEG2 = pop_select(EEG, channel=['FP1', 'FP2'])
-    print(EEG2)
diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr.py b/src/eegprep/plugins/ICLabel/eeg_autocorr.py
index 5bb01a22..ee8cd208 100644
--- a/src/eegprep/plugins/ICLabel/eeg_autocorr.py
+++ b/src/eegprep/plugins/ICLabel/eeg_autocorr.py
@@ -56,27 +56,3 @@ def eeg_autocorr(EEG, pct_data=None):
     ac = ac[:, 1:]
 
     return ac
-
-
-def test_eeg_autocorr():
-    """Test the eeg_autocorr function."""
-    EEG = {
-        'srate': 256,
-        'icaweights': np.random.randn(10, 256),
-        'pnts': 1000,
-        'trials': 5,
-        'icaact': np.random.randn(10, 1000, 5),
-    }
-
-    eeg_autocorr(EEG, 100)
-
-    # print information about psdmed
-    # print(psdmed.shape)
-
-    # print(psdmed)
-
-    # assert psdmed.shape == (10, 100)
-    # assert np.all(np.isfinite(psdmed))
-
-
-# test_eeg_autocorr()
diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py b/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py
index 6c77a3bb..d00242cc 100644
--- a/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py
+++ b/src/eegprep/plugins/ICLabel/eeg_autocorr_fftw.py
@@ -7,7 +7,6 @@
 import numpy as np
 from scipy.fft import fft, ifft, next_fast_len
 from scipy.signal import resample_poly
-from ...functions.popfunc.pop_loadset import pop_loadset
 
 
 def eeg_autocorr_fftw(EEG, pct_data=100):
@@ -60,29 +59,3 @@ def eeg_autocorr_fftw(EEG, pct_data=100):
     ac = ac[:, 1:101]
 
     return ac
-
-
-def test_eeg_autocorr_fftw():
-    """Test function for eeg_autocorr_fftw."""
-    EEG = {
-        'srate': 256,
-        'icaweights': np.random.randn(10, 256),
-        'pnts': 1000,
-        'trials': 5,
-        'icaact': np.random.randn(10, 1000, 5),
-    }
-    EEG = pop_loadset('/System/Volumes/Data/data/data/STUDIES/STERN/S01/Memorize.set')
-
-    # reshape the last two dimensions of EEG['icaact']
-    # EEG['icaact'] = EEG['icaact'].reshape(EEG['icaact'].shape[0], -1)
-
-    # convert EEG['icaact'] to double precision
-
-    psdmed = eeg_autocorr_fftw(EEG, 100)
-
-    # print information about psdmed
-    print(psdmed.shape)
-    print(psdmed)
-
-
-# test_eeg_autocorr_fftw()
diff --git a/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py b/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py
index 62ed2c61..6dfaaf15 100644
--- a/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py
+++ b/src/eegprep/plugins/ICLabel/eeg_autocorr_welch.py
@@ -7,7 +7,6 @@
 import numpy as np
 from scipy.signal import resample_poly
 import random
-from ...functions.popfunc.pop_loadset import pop_loadset
 from numpy.fft import fft, ifft
 
 
@@ -83,18 +82,3 @@ def eeg_autocorr_welch(EEG, pct_data=100):
     ac = ac[:, 1:101]
 
     return ac
-
-
-def test_eeg_autocorr_welch():
-    """Test function for eeg_autocorr_welch."""
-    eeglab_file_path = './eeglab_data_with_ica_tmp.set'
-    EEG = pop_loadset(eeglab_file_path)
-
-    eeg_autocorr_welch(EEG, 100)
-
-    # print information about psdmed
-    # print(psdmed.shape)
-    # print(psdmed)
-
-
-# test_eeg_autocorr_welch()
diff --git a/src/eegprep/plugins/ICLabel/eeg_rpsd.py b/src/eegprep/plugins/ICLabel/eeg_rpsd.py
index a4b52cc0..39e532a8 100644
--- a/src/eegprep/plugins/ICLabel/eeg_rpsd.py
+++ b/src/eegprep/plugins/ICLabel/eeg_rpsd.py
@@ -70,21 +70,3 @@ def eeg_rpsd(EEG, nfreqs=None, pct_data=100):
         psdmed[it, :] = 20 * np.log10(np.median(temp, axis=2))
 
     return psdmed
-
-
-def test_eeg_rpsd():
-    """Test the eeg_rpsd function with sample data."""
-    EEG = {
-        'srate': 256,
-        'icaweights': np.random.randn(10, 256),
-        'pnts': 1000,
-        'trials': 5,
-        'icaact': np.random.randn(10, 1000, 5),
-    }
-
-    psdmed = eeg_rpsd(EEG, 100)
-    assert psdmed.shape == (10, 100)
-    assert np.all(np.isfinite(psdmed))
-
-
-# test_eeg_rpsd()
diff --git a/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py b/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py
index f3b20c80..e4f01895 100644
--- a/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py
+++ b/src/eegprep/plugins/ICLabel/iclabel_net_load_py_measures.py
@@ -1,11 +1,14 @@
 """ICLabel neural network model loading utilities."""
 
+import logging
 from pathlib import Path
 
 import scipy
 import scipy.io
 import torch
 
+logger = logging.getLogger(__name__)
+
 
 class Reshape(torch.nn.Module):
     """Reshape layer for PyTorch."""
@@ -42,11 +45,11 @@ def __init__(self, mat_path):
         iclabel_matlab = scipy.io.loadmat(mat_path)
         params = iclabel_matlab['params'][0]
         i = 11
-        print('shape of param', i, torch.tensor(params[i][1]).shape)
+        logger.debug("shape of param %s: %s", i, torch.tensor(params[i][1]).shape)
         self.discriminator_image_layer1_conv = torch.nn.Conv2d(
             in_channels=1, out_channels=128, kernel_size=4, stride=2, padding=1, dilation=1
         )
-        print(self.discriminator_image_layer1_conv.weight.shape)
+        logger.debug("image layer 1 conv weight shape: %s", self.discriminator_image_layer1_conv.weight.shape)
         self.discriminator_image_layer1_conv.weight = torch.nn.Parameter(torch.tensor(params[0][1]).permute(3, 2, 0, 1))
         self.discriminator_image_layer1_conv.bias = torch.nn.Parameter(torch.tensor(params[1][1]).squeeze())
         self.discriminator_image_layer1_relu = torch.nn.LeakyReLU(0.2)
@@ -132,7 +135,7 @@ def forward(self, image, psdmed, autocorr):
         x_image = self.discriminator_image_layer2_relu(x_image)
         x_image = self.discriminator_image_layer3_conv(x_image)
         x_image = self.discriminator_image_layer3_relu(x_image)
-        print('x_image', x_image.shape)
+        logger.debug("x_image shape: %s", x_image.shape)
 
         x_psdmed = self.discriminator_psdmed_layer1_conv_conv(psdmed)
         x_psdmed = self.discriminator_psdmed_layer1_conv_relu(x_psdmed)
@@ -143,7 +146,7 @@ def forward(self, image, psdmed, autocorr):
         x_psdmed = self.discriminator_psdmed_reshape(x_psdmed)
         x_psdmed = self.discriminator_psdmed_concat1([x_psdmed] * 4)
         x_psdmed = self.discriminator_psdmed_concat2([x_psdmed] * 4)
-        print('x_psdmed', x_psdmed.shape)
+        logger.debug("x_psdmed shape: %s", x_psdmed.shape)
 
         x_autocorr = self.discriminator_autocorr_layer1_conv_conv(autocorr)
         x_autocorr = self.discriminator_autocorr_layer1_conv_relu(x_autocorr)
@@ -154,11 +157,11 @@ def forward(self, image, psdmed, autocorr):
         x_autocorr = self.discriminator_autocorr_reshape(x_autocorr)
         x_autocorr = self.discriminator_autocorr_concat1([x_autocorr] * 4)
         x_autocorr = self.discriminator_autocorr_concat2([x_autocorr] * 4)
-        print('x_autocorr', x_autocorr.shape)
+        logger.debug("x_autocorr shape: %s", x_autocorr.shape)
 
         x = self.discriminator_concat([x_image, x_psdmed, x_autocorr])
         x = self.discriminator_conv(x)
-        print('x', x.shape)
+        logger.debug("x shape: %s", x.shape)
         # subtract max value to avoid overflow
         x = x - torch.max(x, dim=1, keepdim=True).values
         x = self.discriminator_softmax(x)
@@ -174,13 +177,13 @@ def forward(self, image, psdmed, autocorr):
     autocorr_mat = data['grid'][0][2]
     # assuming third dimension is trivial and last dimension is channel. First two dimensions (32 x 32) are size of topoplot
     image = torch.tensor(image_mat).permute(-1, 2, 0, 1)
-    print('image shape', image.shape)
+    logger.debug("image shape: %s", image.shape)
     psdmed = torch.tensor(psdmed_mat).permute(-1, 2, 0, 1)
-    print('psd shape', psdmed.shape)
+    logger.debug("psd shape: %s", psdmed.shape)
     autocorr = torch.tensor(autocorr_mat).permute(-1, 2, 0, 1)
-    print('autocorr shape', autocorr.shape)
+    logger.debug("autocorr shape: %s", autocorr.shape)
     output = model(image, psdmed, autocorr)
-    print(output.shape)
+    logger.debug("output shape: %s", output.shape)
 
     # save the output to a mat file
     scipy.io.savemat('output4_py.mat', {'output': output.detach().numpy()})
diff --git a/tests/test_eeg_eegrej.py b/tests/test_eeg_eegrej.py
index 23cf5fee..13e56c4a 100644
--- a/tests/test_eeg_eegrej.py
+++ b/tests/test_eeg_eegrej.py
@@ -2,7 +2,6 @@
 import tempfile
 import unittest
 import numpy as np
-from unittest.mock import patch
 
 # Assume eeg_eegrej is defined as in your module that imports: from eegrej import eegrej
 from eegprep import eeg_eegrej
@@ -253,10 +252,12 @@ def test_eeg_eegrej_overlapping_regions(self):
         # Overlapping regions: [3, 7] and [5, 10] should merge to [3, 10]
         regions = np.array([[3, 7], [5, 10]])
 
-        with patch('builtins.print') as mock_print:
+        with self.assertLogs("eegprep.functions.popfunc.eeg_eegrej", level="WARNING") as captured:
             result = eeg_eegrej(EEG, regions)
-            # Should print warning about overlapping regions
-            mock_print.assert_called_with("Warning: overlapping regions detected and fixed in eeg_eegrej")
+
+        self.assertTrue(
+            any("Overlapping regions detected and fixed in eeg_eegrej" in message for message in captured.output)
+        )
 
         # Should have 20 - 8 = 12 samples remaining (removed samples 3-10)
         self.assertEqual(result['pnts'], 12)
diff --git a/tests/test_eeglabcompat.py b/tests/test_eeglabcompat.py
index 8ffd7e79..910cb7f9 100644
--- a/tests/test_eeglabcompat.py
+++ b/tests/test_eeglabcompat.py
@@ -8,6 +8,7 @@
 import os
 import unittest
 from copy import deepcopy
+from pathlib import Path
 
 import numpy as np
 
@@ -22,11 +23,51 @@
 from eegprep import clean_artifacts, pop_loadset
 from eegprep.functions.adminfunc.eeg_checkset import eeg_checkset
 from eegprep.utils.testing import DebuggableTestCase
+import eegprep.functions.adminfunc.eeglabcompat as eeglabcompat
 
 # Path to test data
 LOCAL_DATA_PATH = os.path.join(os.path.dirname(__file__), '../sample_data/')
 
 
+def test_eeglab_clean_artifacts_roundtrip_uses_private_tempdir(monkeypatch, tmp_path):
+    paths: dict[str, list[Path]] = {"save": [], "matlab_load": [], "matlab_save": [], "load": []}
+
+    class DummyEeglab:
+        def pop_loadset(self, filename):
+            paths["matlab_load"].append(Path(filename))
+            return {"loaded": filename}
+
+        def clean_artifacts(self, EEG, *_args):
+            return {"cleaned": EEG}
+
+        def pop_saveset(self, EEG, filename):
+            paths["matlab_save"].append(Path(filename))
+            Path(filename).write_text("cleaned", encoding="utf-8")
+            return EEG
+
+    def fake_pop_saveset(EEG, filename):
+        paths["save"].append(Path(filename))
+        Path(filename).write_text("input", encoding="utf-8")
+        return EEG
+
+    def fake_pop_loadset(filename):
+        paths["load"].append(Path(filename))
+        return {"loaded_from": str(filename)}
+
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(eeglabcompat, "get_eeglab", lambda auto_file_roundtrip=False: DummyEeglab())
+    monkeypatch.setattr(eeglabcompat, "pop_saveset", fake_pop_saveset)
+    monkeypatch.setattr(eeglabcompat, "pop_loadset", fake_pop_loadset)
+
+    result = eeglabcompat.clean_artifacts({"data": np.zeros((1, 4))}, BurstCriterion="off")
+
+    assert result["loaded_from"].endswith("output.set")
+    assert not (tmp_path / "tmp.set").exists()
+    assert not (tmp_path / "tmp2.set").exists()
+    assert all(path.parent != tmp_path for values in paths.values() for path in values)
+    assert {path.name for values in paths.values() for path in values} == {"input.set", "output.set"}
+
+
 class TestMatlabWrapper(DebuggableTestCase):
     """Test cases for MatlabWrapper class."""
 
diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py
index 957d2cbb..4a7400b9 100644
--- a/tests/test_gui_main_window.py
+++ b/tests/test_gui_main_window.py
@@ -982,6 +982,26 @@ def test_pop_topoplot_menu_actions_record_history_without_replacing_dataset(self
         self.assertIs(session.ALLEEG[0], original_eeg)
         self.assertEqual(session.ALLCOM[-1], "pop_topoplot(EEG, typeplot=1, items=[0])")
 
+    def test_dipfit_mutating_menu_action_updates_session_and_history(self):
+        session = EEGPrepSession()
+        session.store_current(_demo_eeg(), new=True)
+        dispatcher = MenuActionDispatcher(session)
+        original = session.EEG
+        fitted = dict(original, setname="dipfit updated")
+        fitted["dipfit"] = {"model": [{"rv": 0.01}]}
+
+        with mock.patch(
+            "eegprep.plugins.dipfit.pop_dipfit_gridsearch.pop_dipfit_gridsearch",
+            return_value=(fitted, "EEG = pop_dipfit_gridsearch(EEG, select=[1]);"),
+        ) as gridsearch:
+            dispatcher.dispatch("pop_dipfit_gridsearch")
+
+        gridsearch.assert_called_once_with(original, return_com=True)
+        self.assertEqual(session.EEG["setname"], "dipfit updated")
+        self.assertEqual(session.ALLEEG[0]["dipfit"]["model"][0]["rv"], 0.01)
+        self.assertEqual(session.LASTCOM, "EEG = pop_dipfit_gridsearch(EEG, select=[1]);")
+        self.assertEqual(session.ALLCOM[-1], "EEG = pop_dipfit_gridsearch(EEG, select=[1]);")
+
     def test_copyset_menu_updates_alleeg_eeg_currentset_and_history(self):
         session = EEGPrepSession()
         session.store_current(_demo_eeg(), new=True)
diff --git a/tests/test_processing_logging_contract.py b/tests/test_processing_logging_contract.py
new file mode 100644
index 00000000..2619b065
--- /dev/null
+++ b/tests/test_processing_logging_contract.py
@@ -0,0 +1,70 @@
+import logging
+
+import numpy as np
+import pytest
+
+from eegprep.functions.popfunc.eeg_eegrej import _combine_regions
+from eegprep.functions.popfunc.eeg_lat2point import eeg_lat2point
+from eegprep.functions.popfunc.pop_select import pop_select
+
+
+def _minimal_eeg():
+    return {
+        "data": np.zeros((2, 20), dtype=np.float32),
+        "nbchan": 2,
+        "pnts": 20,
+        "trials": 1,
+        "srate": 100,
+        "xmin": 0,
+        "xmax": 0.19,
+        "times": np.arange(20),
+        "chanlocs": [{"labels": "Cz"}, {"labels": "Pz"}],
+        "event": [],
+        "urevent": [],
+        "epoch": [],
+        "history": "",
+        "icaact": np.array([]),
+        "icaweights": np.array([]),
+        "icasphere": np.array([]),
+        "icawinv": np.array([]),
+        "icachansind": np.array([], dtype=int),
+        "chaninfo": {},
+        "reject": {},
+    }
+
+
+def test_pop_select_warnings_use_logging_not_stdout(capsys, caplog):
+    caplog.set_level(logging.WARNING)
+
+    with pytest.raises(ValueError, match="Channels not found"):
+        pop_select(_minimal_eeg(), channel=["Missing"])
+
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert captured.err == ""
+    assert "channels not found" in caplog.text
+
+
+def test_latency_range_warning_uses_logging_not_stdout(capsys, caplog):
+    caplog.set_level(logging.WARNING)
+
+    newlat, flag = eeg_lat2point([2], [1], 1, [0, 0], outrange=1)
+
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert captured.err == ""
+    assert flag == 1
+    np.testing.assert_array_equal(newlat, np.array([1.0]))
+    assert "Points out of range detected" in caplog.text
+
+
+def test_eegrej_overlap_warning_uses_logging_not_stdout(capsys, caplog):
+    caplog.set_level(logging.WARNING)
+
+    combined = _combine_regions(np.array([[1, 3], [3, 5], [10, 12]]))
+
+    captured = capsys.readouterr()
+    assert captured.out == ""
+    assert captured.err == ""
+    np.testing.assert_array_equal(combined, np.array([[1, 5], [10, 12]]))
+    assert "Overlapping regions detected" in caplog.text
diff --git a/tests/test_visual_parity.py b/tests/test_visual_parity.py
index fd45bc7a..db6bd9a9 100644
--- a/tests/test_visual_parity.py
+++ b/tests/test_visual_parity.py
@@ -11,7 +11,10 @@
 from tools.visual_parity.config import load_manifest
 from tools.visual_parity.export_eegprep_menu_inventory import export_inventory
 from tools.visual_parity.menu_inventory import compare_menu_trees
-from eegprep.functions.guifunc.visual_capture import _main_window_menu_state as _eegprep_main_window_menu_state
+from eegprep.functions.guifunc.visual_capture import (
+    _capture_case_handlers,
+    _main_window_menu_state as _eegprep_main_window_menu_state,
+)
 
 
 ONE_PIXEL_PNG = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII="
@@ -131,6 +134,18 @@ def test_load_manifest_parses_cases(self):
         self.assertEqual(cases["pop_interp_dataset_index_dialog"].targets["eeglab"].action, "inputdlg2:dataset_index")
         self.assertEqual(cases["pop_reref_help_dialog"].targets["eeglab"].action, "pophelp:pop_reref")
 
+    def test_eegprep_visual_capture_registry_covers_manifest_cases(self):
+        handlers = _capture_case_handlers()
+        cases = load_manifest()
+
+        for case_id, case in cases.items():
+            eegprep_target = case.targets.get("eegprep")
+            if eegprep_target is None:
+                continue
+            if "eegprep.functions.guifunc.visual_capture" in eegprep_target.command:
+                with self.subTest(case_id=case_id):
+                    self.assertIn(case_id, handlers)
+
 
 class VisualParityCaptureTests(unittest.TestCase):
     def test_capture_command_receives_output_environment(self):

From 7df77bede25ed76f562a1dae61845cf2b0bc3a3f Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 00:00:25 -0700
Subject: [PATCH 10/16] Keep console eegh bound to session history

---
 src/eegprep/functions/adminfunc/console.py |  4 ++++
 tests/test_console_workspace.py            | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/src/eegprep/functions/adminfunc/console.py b/src/eegprep/functions/adminfunc/console.py
index 1793f743..88a9b8b9 100644
--- a/src/eegprep/functions/adminfunc/console.py
+++ b/src/eegprep/functions/adminfunc/console.py
@@ -241,6 +241,8 @@ def __init__(self, bridge: EEGPrepConsoleWorkspace) -> None:
     def __getattr__(self, name: str) -> Any:
         if name.startswith("pop_"):
             return self._bridge.pop_wrapper(name)
+        if name == "eegh":
+            return self._bridge.namespace["eegh"]
         return getattr(eegprep, name)
 
     def __dir__(self) -> list[str]:
@@ -451,6 +453,8 @@ def _bind_exports(self, exports: Mapping[str, Any] | None) -> None:
         for name in export_names:
             if name == "__version__":
                 self.namespace[name] = eegprep.__version__
+            elif name == "eegh":
+                continue
             elif name.startswith("pop_"):
                 wrapped = ConsolePopFunction(name, self, None if exports is None else exports[name])
                 self._wrapped_pop_exports[name] = wrapped
diff --git a/tests/test_console_workspace.py b/tests/test_console_workspace.py
index 53780922..b936d1b7 100644
--- a/tests/test_console_workspace.py
+++ b/tests/test_console_workspace.py
@@ -367,6 +367,16 @@ def test_console_eegh_displays_and_finds_session_history():
     workspace.close()
 
 
+def test_default_console_eegh_uses_session_history_after_public_exports_bind():
+    session = EEGPrepSession()
+    workspace = EEGPrepConsoleWorkspace(session)
+    session.add_history("EEG = pop_fileio('sample.set');")
+
+    assert workspace.namespace["eegh"]() == "1. EEG = pop_fileio('sample.set');"
+    assert workspace.namespace["eegprep"].eegh() == "1. EEG = pop_fileio('sample.set');"
+    workspace.close()
+
+
 def test_console_eegh_positive_index_replays_command_through_workspace():
     session = EEGPrepSession()
     session.store_current(_demo_eeg(), new=True)

From ba83d88207509486be90948c97b90b5226881b3a Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 00:16:19 -0700
Subject: [PATCH 11/16] Rename CLI migration command

---
 docs/source/user_guide/agent_cli.rst          | 19 ++++++-------
 .../cli/commands/{eeglab.py => migrate.py}    | 24 ++++++++---------
 src/eegprep/cli/discovery.py                  | 14 +++++-----
 src/eegprep/cli/main.py                       |  4 +--
 ...s.py => test_cli_bids_migrate_commands.py} | 27 ++++++++++---------
 tests/test_cli_main.py                        |  4 +++
 6 files changed, 50 insertions(+), 42 deletions(-)
 rename src/eegprep/cli/commands/{eeglab.py => migrate.py} (90%)
 rename tests/{test_cli_bids_eeglab_commands.py => test_cli_bids_migrate_commands.py} (89%)

diff --git a/docs/source/user_guide/agent_cli.rst b/docs/source/user_guide/agent_cli.rst
index b4bef338..f97fb9df 100644
--- a/docs/source/user_guide/agent_cli.rst
+++ b/docs/source/user_guide/agent_cli.rst
@@ -123,18 +123,19 @@ QC results include stable recommendation codes that an agent can reason over.
 HTML reports are for human review; the paired JSON and manifests are for
 automation.
 
-BIDS And EEGLAB Migration
-=========================
+BIDS And Migration
+==================
 
 .. code-block:: bash
 
    eegprep bids validate bids_root --json
    eegprep bids import bids_root --subject 01 --task rest --output sub-01.set --json
    eegprep bids export clean.set --bids-root bids_out --subject 01 --task rest --json
-   eegprep eeglab history old_pipeline.set --json
-   eegprep eeglab compare matlab_output.set eegprep_output.set --json
-   eegprep eeglab convert-script old_pipeline.m --output preprocess.yaml --json
-
-The EEGLAB helpers are migration aids. Script conversion is intentionally
-best-effort and reports unsupported commands instead of silently inventing
-behavior.
+   eegprep migrate history old_pipeline.set --json
+   eegprep migrate compare matlab_output.set eegprep_output.set --json
+   eegprep migrate convert-script old_pipeline.m --output preprocess.yaml --json
+
+Migration helpers can inspect EEGLAB command histories and compare datasets
+without making normal EEGPrep CLI usage depend on MATLAB or an EEGLAB checkout.
+Script conversion is intentionally best-effort and reports unsupported commands
+instead of silently inventing behavior.
diff --git a/src/eegprep/cli/commands/eeglab.py b/src/eegprep/cli/commands/migrate.py
similarity index 90%
rename from src/eegprep/cli/commands/eeglab.py
rename to src/eegprep/cli/commands/migrate.py
index 51da4bf7..3cbd6e34 100644
--- a/src/eegprep/cli/commands/eeglab.py
+++ b/src/eegprep/cli/commands/migrate.py
@@ -1,4 +1,4 @@
-"""EEGLAB migration and compatibility commands for the EEGPrep CLI."""
+"""Migration and compatibility commands for the EEGPrep CLI."""
 
 from __future__ import annotations
 
@@ -33,22 +33,22 @@
 
 
 def register(subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser:
-    """Register ``eeglab`` compatibility commands."""
-    parser = subparsers.add_parser("eeglab", help="Inspect EEGLAB history and migration compatibility.")
-    eeglab_sub = parser.add_subparsers(dest="eeglab_command", required=True)
+    """Register migration compatibility commands."""
+    parser = subparsers.add_parser("migrate", help="Inspect old EEGLAB histories and migration compatibility.")
+    migrate_sub = parser.add_subparsers(dest="migrate_command", required=True)
 
-    history_parser = eeglab_sub.add_parser("history", help="Inspect mapped EEGLAB history operations.")
+    history_parser = migrate_sub.add_parser("history", help="Inspect mapped EEGLAB history operations.")
     history_parser.add_argument("input")
     history_parser.add_argument("--json", action="store_true")
     history_parser.set_defaults(handler=lambda args: history(args.input))
 
-    compare_parser = eeglab_sub.add_parser("compare", help="Compare two EEGLAB .set datasets.")
+    compare_parser = migrate_sub.add_parser("compare", help="Compare two EEGLAB .set datasets.")
     compare_parser.add_argument("left")
     compare_parser.add_argument("right")
     compare_parser.add_argument("--json", action="store_true")
     compare_parser.set_defaults(handler=lambda args: compare(args.left, args.right))
 
-    convert = eeglab_sub.add_parser("convert-script", help="Best-effort conversion of simple EEGLAB scripts to YAML.")
+    convert = migrate_sub.add_parser("convert-script", help="Best-effort conversion of simple EEGLAB scripts to YAML.")
     convert.add_argument("script")
     convert.add_argument("--to", choices=("eegprep-yaml",), default="eegprep-yaml")
     convert.add_argument("--output")
@@ -95,7 +95,7 @@ def history(input_path: str | Path) -> dict[str, Any]:
         operations.append(record)
     return {
         "status": "ok",
-        "schema_version": "eegprep.eeglab.history.v1",
+        "schema_version": "eegprep.migrate.history.v1",
         "input": str(input_path),
         "history_detected": bool(operations),
         "operations": operations,
@@ -148,7 +148,7 @@ def compare(left: str | Path, right: str | Path) -> dict[str, Any]:
             differences.append({"path": "data", "code": "DATA_VALUE_MISMATCH", "max_abs_diff": max_abs_diff})
     return {
         "status": "ok",
-        "schema_version": "eegprep.eeglab.compare.v1",
+        "schema_version": "eegprep.migrate.compare.v1",
         "left": str(left),
         "right": str(right),
         "equivalent": not differences,
@@ -207,7 +207,7 @@ def convert_script(
         output_path_value = str(destination)
     return {
         "status": "ok",
-        "schema_version": "eegprep.eeglab.convert_script.v1",
+        "schema_version": "eegprep.migrate.convert_script.v1",
         "source": str(source),
         "target": target,
         "output": output_path_value,
@@ -219,10 +219,10 @@ def convert_script(
 
 def main(argv: list[str] | None = None) -> int:
     """Standalone module harness for tests and local debugging."""
-    parser = argparse.ArgumentParser(prog="eegprep eeglab")
+    parser = argparse.ArgumentParser(prog="eegprep migrate")
     subparsers = parser.add_subparsers(dest="command", required=True)
     register(subparsers)
-    args = parser.parse_args(["eeglab", *(sys.argv[1:] if argv is None else argv)])
+    args = parser.parse_args(["migrate", *(sys.argv[1:] if argv is None else argv)])
     result = args.handler(args)
     print(json.dumps(json_safe(result), sort_keys=True))
     return 0 if result.get("status") in {"ok", "warning"} else 1
diff --git a/src/eegprep/cli/discovery.py b/src/eegprep/cli/discovery.py
index 25b49268..932649d4 100644
--- a/src/eegprep/cli/discovery.py
+++ b/src/eegprep/cli/discovery.py
@@ -70,7 +70,7 @@ def capabilities() -> dict[str, Any]:
                 "supports_json": True,
                 "supports_dry_run": False,
             },
-            "eeglab": {
+            "migrate": {
                 "description": "Inspect EEGLAB history, compare datasets, and convert simple MATLAB histories.",
                 "inputs": ["eeglab_set", "matlab_script"],
                 "outputs": ["json", "eegprep_pipeline_yaml"],
@@ -149,9 +149,9 @@ def command_schema(command: str) -> dict[str, Any]:
                 "task": {"type": "string"},
             },
         },
-        "eeglab": {
-            "schema_version": "eegprep.schema.command.eeglab.v1",
-            "syntax": "eegprep eeglab <history|compare|convert-script> ... --json",
+        "migrate": {
+            "schema_version": "eegprep.schema.command.migrate.v1",
+            "syntax": "eegprep migrate <history|compare|convert-script> ... --json",
             "required": ["subcommand"],
             "properties": {
                 "left": {"type": "string"},
@@ -238,9 +238,9 @@ def examples(name: str) -> dict[str, Any]:
             "eegprep bids validate bids_root --json",
             "eegprep bids export input.set --bids-root bids_out --subject 01 --task rest --json",
         ],
-        "eeglab": [
-            "eegprep eeglab history sample_data/eeglab_data.set --json",
-            "eegprep eeglab compare left.set right.set --json",
+        "migrate": [
+            "eegprep migrate history sample_data/eeglab_data.set --json",
+            "eegprep migrate compare left.set right.set --json",
         ],
         "skills": ["eegprep skills list --json", "eegprep skills get eegprep-cli"],
     }
diff --git a/src/eegprep/cli/main.py b/src/eegprep/cli/main.py
index 265af365..9b728f03 100644
--- a/src/eegprep/cli/main.py
+++ b/src/eegprep/cli/main.py
@@ -13,7 +13,7 @@
 from eegprep.cli.dataset import inspect_channels, inspect_dataset, inspect_events, inspect_ica, validate_dataset
 from eegprep.cli.commands import batch as batch_commands
 from eegprep.cli.commands import bids as bids_commands
-from eegprep.cli.commands import eeglab as eeglab_commands
+from eegprep.cli.commands import migrate as migrate_commands
 from eegprep.cli.commands import pipeline as pipeline_commands
 from eegprep.cli.commands import qc as qc_commands
 from eegprep.cli.commands import report as report_commands
@@ -126,7 +126,7 @@ def build_parser() -> EEGPrepArgumentParser:
     report_commands.register(subparsers)
     batch_commands.register(subparsers)
     bids_commands.register(subparsers)
-    eeglab_commands.register(subparsers)
+    migrate_commands.register(subparsers)
 
     parser.set_defaults(handler=_handle_root)
     return parser
diff --git a/tests/test_cli_bids_eeglab_commands.py b/tests/test_cli_bids_migrate_commands.py
similarity index 89%
rename from tests/test_cli_bids_eeglab_commands.py
rename to tests/test_cli_bids_migrate_commands.py
index 86a9cec0..403cd176 100644
--- a/tests/test_cli_bids_eeglab_commands.py
+++ b/tests/test_cli_bids_migrate_commands.py
@@ -103,8 +103,8 @@ def test_bids_export_refuses_non_empty_root_without_overwrite(tmp_path):
     assert existing.read_text(encoding="utf-8") == "existing"
 
 
-def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path):
-    from eegprep.cli.commands import eeglab as eeglab_cli
+def test_migrate_history_maps_supported_and_unsupported_commands(tmp_path):
+    from eegprep.cli.commands import migrate as migrate_cli
 
     set_file = tmp_path / "history.set"
     eeg = _eeg()
@@ -117,9 +117,10 @@ def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path):
     )
     pop_saveset(eeg, set_file)
 
-    payload = eeglab_cli.history(set_file)
+    payload = migrate_cli.history(set_file)
 
     assert payload["status"] == "ok"
+    assert payload["schema_version"] == "eegprep.migrate.history.v1"
     assert [operation["eeglab_command"] for operation in payload["operations"]] == [
         "pop_loadset",
         "pop_resample",
@@ -131,8 +132,8 @@ def test_eeglab_history_maps_supported_and_unsupported_commands(tmp_path):
     assert payload["operations"][2]["unsupported"]["code"] == "COMMAND_NOT_IMPLEMENTED"
 
 
-def test_eeglab_compare_reports_structured_differences(tmp_path):
-    from eegprep.cli.commands import eeglab as eeglab_cli
+def test_migrate_compare_reports_structured_differences(tmp_path):
+    from eegprep.cli.commands import migrate as migrate_cli
 
     left = tmp_path / "left.set"
     right = tmp_path / "right.set"
@@ -144,9 +145,10 @@ def test_eeglab_compare_reports_structured_differences(tmp_path):
     pop_saveset(eeg_left, left)
     pop_saveset(eeg_right, right)
 
-    payload = eeglab_cli.compare(left, right)
+    payload = migrate_cli.compare(left, right)
 
     assert payload["status"] == "ok"
+    assert payload["schema_version"] == "eegprep.migrate.compare.v1"
     assert payload["equivalent"] is False
     differences_by_path = {difference["path"]: difference for difference in payload["differences"]}
     assert differences_by_path["srate"]["code"] == "VALUE_MISMATCH"
@@ -154,8 +156,8 @@ def test_eeglab_compare_reports_structured_differences(tmp_path):
     assert payload["data"]["max_abs_diff"] == 1.25
 
 
-def test_eeglab_compare_reports_nan_placement_differences(tmp_path):
-    from eegprep.cli.commands import eeglab as eeglab_cli
+def test_migrate_compare_reports_nan_placement_differences(tmp_path):
+    from eegprep.cli.commands import migrate as migrate_cli
 
     left = tmp_path / "left_nan.set"
     right = tmp_path / "right_nan.set"
@@ -168,14 +170,14 @@ def test_eeglab_compare_reports_nan_placement_differences(tmp_path):
     pop_saveset(eeg_left, left)
     pop_saveset(eeg_right, right)
 
-    payload = eeglab_cli.compare(left, right)
+    payload = migrate_cli.compare(left, right)
 
     assert payload["equivalent"] is False
     assert any(difference["code"] == "DATA_FINITE_MASK_MISMATCH" for difference in payload["differences"])
 
 
-def test_eeglab_convert_script_reports_best_effort_conversion(tmp_path):
-    from eegprep.cli.commands import eeglab as eeglab_cli
+def test_migrate_convert_script_reports_best_effort_conversion(tmp_path):
+    from eegprep.cli.commands import migrate as migrate_cli
 
     script = tmp_path / "pipeline.m"
     output = tmp_path / "pipeline.yaml"
@@ -190,9 +192,10 @@ def test_eeglab_convert_script_reports_best_effort_conversion(tmp_path):
         encoding="utf-8",
     )
 
-    payload = eeglab_cli.convert_script(script, output=output)
+    payload = migrate_cli.convert_script(script, output=output)
 
     assert payload["status"] == "ok"
+    assert payload["schema_version"] == "eegprep.migrate.convert_script.v1"
     assert payload["target"] == "eegprep-yaml"
     assert payload["converted_steps"][1]["name"] == "resample"
     assert payload["unsupported_commands"][0]["command"] == "topoplot"
diff --git a/tests/test_cli_main.py b/tests/test_cli_main.py
index 32dd4cb8..6deb05c8 100644
--- a/tests/test_cli_main.py
+++ b/tests/test_cli_main.py
@@ -37,6 +37,8 @@ def test_help_has_agent_start_section():
     assert result.returncode == 0
     assert "Start here (for AI agents):" in result.stdout
     assert "eegprep skills get eegprep-cli" in result.stdout
+    assert "migrate" in result.stdout
+    assert "eeglab              Inspect EEGLAB history" not in result.stdout
 
 
 def test_capabilities_schema_examples_and_skill_are_json_readable():
@@ -49,6 +51,8 @@ def test_capabilities_schema_examples_and_skill_are_json_readable():
     commands = _json_stdout(capabilities)["commands"]
     assert "filter" in commands
     assert "batch" in commands
+    assert "migrate" in commands
+    assert "eeglab" not in commands
     assert schema.returncode == 0
     assert _json_stdout(schema)["schema"]["schema_version"] == "eegprep.schema.command.filter.v1"
     assert examples.returncode == 0

From 974ae6b123bf30eaca784cb8c735c8a28be246bb Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 00:44:05 -0700
Subject: [PATCH 12/16] Enable pop_newset description editing

---
 src/eegprep/functions/guifunc/qt.py         | 19 ++++++
 src/eegprep/functions/popfunc/pop_newset.py | 24 +++++--
 tests/test_pop_newset.py                    | 71 +++++++++++++++++++++
 3 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/src/eegprep/functions/guifunc/qt.py b/src/eegprep/functions/guifunc/qt.py
index 3dc2ac8b..cecf1d49 100644
--- a/src/eegprep/functions/guifunc/qt.py
+++ b/src/eegprep/functions/guifunc/qt.py
@@ -470,6 +470,11 @@ def _connect_callback(self, callback: CallbackSpec | None, widgets: dict[str, An
             source = widgets.get(params["button"])
             if source is not None:
                 source.clicked.connect(lambda: self._show_callback_message(source, params))
+        elif callback.name == "edit_text":
+            source = widgets.get(params["button"])
+            target = widgets.get(params.get("target", params["button"]))
+            if source is not None and target is not None:
+                source.clicked.connect(lambda: self._edit_text(source, target, params))
         elif callback.name == "open_eegplot":
             source = widgets.get(params["button"])
             if source is not None:
@@ -1114,6 +1119,20 @@ def _show_callback_message(parent: Any, params: Mapping[str, Any]) -> None:
         _qt_core, qt_widgets = _require_qt()
         qt_widgets.QMessageBox.information(parent, str(params.get("title", "EEGPrep")), str(params.get("message", "")))
 
+    @staticmethod
+    def _edit_text(parent: Any, target: Any, params: Mapping[str, Any]) -> None:
+        _qt_core, qt_widgets = _require_qt()
+        stored_value = target.property(_VALUE_PROPERTY)
+        current = stored_value if stored_value is not None else params.get("value", "")
+        value, accepted = qt_widgets.QInputDialog.getMultiLineText(
+            parent,
+            str(params.get("title", "Edit text")),
+            str(params.get("label", "Text")),
+            str(current),
+        )
+        if accepted:
+            target.setProperty(_VALUE_PROPERTY, str(value))
+
     @staticmethod
     def _select_interp_channels(button: Any, target: Any, params: Mapping[str, Any]) -> None:
         source = str(params.get("source", "")).lower()
diff --git a/src/eegprep/functions/popfunc/pop_newset.py b/src/eegprep/functions/popfunc/pop_newset.py
index 22c2b545..121304ed 100644
--- a/src/eegprep/functions/popfunc/pop_newset.py
+++ b/src/eegprep/functions/popfunc/pop_newset.py
@@ -72,6 +72,7 @@ def pop_newset(
 def pop_newset_dialog_spec(EEG: dict[str, Any], CURRENTSET: Any = None, *, guistring: str = "") -> DialogSpec:
     """Return the EEGLAB-like dialog spec for ``pop_newset``."""
     dataset_name = str(EEG.get("setname") or "")
+    comments = _comments_text(EEG.get("comments", ""))
     prompt = guistring or "What do you want to do with the new dataset?"
     old_prompt = "What do you want to do with the old dataset (not modified since last saved)?"
     return DialogSpec(
@@ -93,11 +94,13 @@ def pop_newset_dialog_spec(EEG: dict[str, Any], CURRENTSET: Any = None, *, guist
                 "Edit description",
                 tag="editdescription",
                 callback=CallbackSpec(
-                    "show_message",
+                    "edit_text",
                     params={
                         "button": "editdescription",
+                        "target": "editdescription",
                         "title": "Edit description",
-                        "message": "Dataset description editing is available from the command line using the comments option.",
+                        "label": "Dataset description:",
+                        "value": comments,
                     },
                 ),
             ),
@@ -149,8 +152,11 @@ def _run_gui(
         "overwrite": "on" if overwrite else "off",
         "gui": "off",
     }
-    if "comments" in result:
-        gui_options["comments"] = str(result.get("comments") or "")
+    comments = result.get("comments")
+    if comments is None:
+        comments = result.get("editdescription")
+    if comments is not None:
+        gui_options["comments"] = str(comments)
     if _is_on(result.get("savenew")):
         gui_options["savenew"] = str(result.get("savefile") or "on").strip() or "on"
     return gui_options
@@ -272,3 +278,13 @@ def _currentset_label(CURRENTSET: Any) -> str:
     if isinstance(CURRENTSET, (list, tuple)):
         return ", ".join(str(item) for item in CURRENTSET) if CURRENTSET else "0"
     return str(CURRENTSET or 0)
+
+
+def _comments_text(value: Any) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, (list, tuple)):
+        return "\n".join(str(item) for item in value)
+    return str(value)
diff --git a/tests/test_pop_newset.py b/tests/test_pop_newset.py
index ec3172ce..f4f56664 100644
--- a/tests/test_pop_newset.py
+++ b/tests/test_pop_newset.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+from eegprep.functions.guifunc.qt import QtDialogRenderer
+from eegprep.functions.guifunc.spec import controls_by_tag
 from eegprep.functions.popfunc.eeg_emptyset import eeg_emptyset
 from eegprep.functions.popfunc.pop_newset import pop_newset, pop_newset_dialog_spec
 
@@ -100,6 +102,75 @@ def test_pop_newset_dialog_old_dataset_prompt_hides_currentset_index():
     assert "What do you want to do with the old dataset 1 (not modified since last saved)?" not in labels
 
 
+def test_pop_newset_dialog_edit_description_opens_multiline_editor():
+    eeg = _eeg(name="processed")
+    eeg["comments"] = ["first line", "second line"]
+
+    control = controls_by_tag(pop_newset_dialog_spec(eeg, 1))["editdescription"]
+
+    assert control.callback is not None
+    assert control.callback.name == "edit_text"
+    assert control.callback.params == {
+        "button": "editdescription",
+        "target": "editdescription",
+        "title": "Edit description",
+        "label": "Dataset description:",
+        "value": "first line\nsecond line",
+    }
+
+
+def test_qt_edit_text_callback_stores_accepted_text(monkeypatch):
+    class QInputDialog:
+        @staticmethod
+        def getMultiLineText(_parent, title, label, text):
+            calls.append((title, label, text))
+            return "", True
+
+    class Widget:
+        def __init__(self):
+            self.properties = {}
+
+        def property(self, name):
+            return self.properties.get(name)
+
+        def setProperty(self, name, value):
+            self.properties[name] = value
+
+    calls = []
+    target = Widget()
+    QtWidgets = type("QtWidgets", (), {"QInputDialog": QInputDialog})
+    monkeypatch.setattr("eegprep.functions.guifunc.qt._require_qt", lambda: (None, QtWidgets))
+
+    QtDialogRenderer._edit_text(
+        object(),
+        target,
+        {"title": "Edit description", "label": "Dataset description:", "value": "old notes"},
+    )
+
+    assert calls == [("Edit description", "Dataset description:", "old notes")]
+    assert QtDialogRenderer._read_widget(target) == ""
+
+
+def test_pop_newset_gui_description_button_value_updates_comments():
+    class Renderer:
+        def run(self, _spec, initial_values=None):
+            return {"setname": "processed", "editdescription": "edited notes", "overwrite": 1}
+
+    alleeg, current, current_set, _command = pop_newset([], _eeg(name="original"), 0)
+
+    alleeg, current, current_set, command = pop_newset(
+        alleeg, _eeg(name="processed"), current_set, "gui", "on", renderer=Renderer()
+    )
+
+    assert len(alleeg) == 2
+    assert current_set == 2
+    assert current["comments"] == "edited notes"
+    assert command == (
+        "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, "
+        "'setname', 'processed', 'comments', 'edited notes', 'overwrite', 'off');"
+    )
+
+
 def test_pop_newset_gui_choice_can_overwrite_current_dataset():
     class Renderer:
         def run(self, _spec, initial_values=None):

From 48fad4a5d2c916e83540ae9113c041fa5f2aa836 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 01:04:41 -0700
Subject: [PATCH 13/16] Run GUI ICA in background task

---
 .notes/implementation-notes.html              |  24 ++++
 .../source/user_guide/gui_console_session.rst |   8 ++
 src/eegprep/functions/guifunc/long_task.py    | 121 ++++++++++++++++++
 src/eegprep/functions/guifunc/menu_actions.py |  63 ++++++++-
 src/eegprep/functions/popfunc/pop_runica.py   |  17 ++-
 src/eegprep/resources/help/pop_runica.md      |   7 +
 tests/conftest.py                             |   1 +
 tests/test_gui_long_task.py                   |  73 +++++++++++
 tests/test_gui_main_window.py                 |  88 +++++++++++++
 9 files changed, 397 insertions(+), 5 deletions(-)
 create mode 100644 src/eegprep/functions/guifunc/long_task.py
 create mode 100644 tests/test_gui_long_task.py

diff --git a/.notes/implementation-notes.html b/.notes/implementation-notes.html
index f2d38960..0a23504f 100644
--- a/.notes/implementation-notes.html
+++ b/.notes/implementation-notes.html
@@ -542,5 +542,29 @@ <h2>Verification Notes</h2>
     EEGLAB-facing 1-based public QC indices, and compare nonfinite data masks in
     <code>eegprep eeglab compare</code>.</li>
   </ul>
+  <h1>Async GUI ICA Progress Notes</h1>
+  <h2>Design Decisions</h2>
+  <ul>
+    <li>Kept <code>pop_runica</code> itself synchronous for scripts, tests, CLI,
+    and console calls. The background worker is owned by the Qt menu action
+    layer because only the GUI needs to keep repainting while the computation
+    runs.</li>
+    <li>Split <code>pop_runica</code> GUI option collection from ICA execution
+    so the options dialog always opens on the main Qt thread and only the pure
+    computation runs in the worker.</li>
+    <li>Session mutation remains on the main thread. The worker returns an
+    updated EEG object and command string; <code>EEGPrepSession</code> is updated
+    only from the success callback.</li>
+  </ul>
+  <h2>Tradeoffs</h2>
+  <ul>
+    <li>The progress dialog is indeterminate because runica progress is
+    iteration/log-message based rather than a reliable percentage. Cancellation
+    is intentionally not exposed until the ICA backends support safe
+    interruption.</li>
+    <li>The reusable long-task helper captures EEGPrep log messages for the
+    dialog, while the console action boundary keeps command echo and progress
+    output ordered for mixed GUI-plus-console workflows.</li>
+  </ul>
 </body>
 </html>
diff --git a/docs/source/user_guide/gui_console_session.rst b/docs/source/user_guide/gui_console_session.rst
index 77acda0b..16f33382 100644
--- a/docs/source/user_guide/gui_console_session.rst
+++ b/docs/source/user_guide/gui_console_session.rst
@@ -101,6 +101,14 @@ GUI actions should update state through session helpers such as
 ``notify_changed()``. They should not mutate a GUI-only copy of ``EEG`` that
 the console cannot see.
 
+Long-running GUI actions use the same session boundary. For example,
+GUI-launched ICA opens the EEGLAB-like ``pop_runica`` options dialog on the main
+thread, runs the ICA computation behind a progress dialog, then stores the
+updated dataset and history only after the worker finishes successfully. While
+the worker is running, progress messages are buffered safely for
+``eegprep-console`` so the replayable command remains visible before related
+output.
+
 ``eegprep-console`` wraps registered ``pop_*`` functions. When a bare call such
 as ``pop_resample(EEG, 64)`` returns a dataset and command string, the wrapper
 stores the returned dataset, updates ``LASTCOM`` and ``ALLCOM``, and tells the
diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py
new file mode 100644
index 00000000..09bf32b1
--- /dev/null
+++ b/src/eegprep/functions/guifunc/long_task.py
@@ -0,0 +1,121 @@
+"""Qt helper for running long GUI actions off the UI thread."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+import logging
+from typing import Any
+
+try:  # pragma: no cover - depends on optional GUI dependency
+    from PySide6 import QtCore, QtWidgets
+except ImportError:  # pragma: no cover - depends on optional GUI dependency
+    QtCore = None
+    QtWidgets = None
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LongTaskHandle:
+    """Keep Qt task objects alive until their worker thread finishes."""
+
+    thread: Any
+    worker: Any
+    dialog: Any
+
+
+def run_long_task(
+    *,
+    parent: Any | None,
+    title: str,
+    label: str,
+    task: Callable[[], Any],
+    on_success: Callable[[Any], None],
+    on_error: Callable[[Exception], None],
+    on_finished: Callable[[LongTaskHandle], None] | None = None,
+) -> LongTaskHandle:
+    """Run ``task`` in a Qt worker thread with an indeterminate progress dialog."""
+    qt_core, qt_widgets = _require_qt()
+
+    progress = qt_widgets.QProgressDialog(label, None, 0, 0, parent)
+    progress.setWindowTitle(title)
+    progress.setCancelButton(None)
+    progress.setAutoClose(False)
+    progress.setAutoReset(False)
+    progress.setMinimumDuration(0)
+    progress.setWindowModality(qt_core.Qt.WindowModal)
+
+    class Worker(qt_core.QObject):
+        succeeded = qt_core.Signal(object)
+        failed = qt_core.Signal(object)
+        message = qt_core.Signal(str)
+        finished = qt_core.Signal()
+
+        def run(self) -> None:
+            handler = _SignalLogHandler(self.message)
+            handler.setFormatter(logging.Formatter("%(message)s"))
+            handler.setLevel(logging.INFO)
+            eegprep_logger = logging.getLogger("eegprep")
+            old_level = eegprep_logger.level
+            if old_level == logging.NOTSET or old_level > logging.INFO:
+                eegprep_logger.setLevel(logging.INFO)
+            eegprep_logger.addHandler(handler)
+            try:
+                self.message.emit(label)
+                result = task()
+                self.succeeded.emit(result)
+            except Exception as exc:  # noqa: BLE001 - forwarded to GUI error handler.
+                self.failed.emit(exc)
+            finally:
+                eegprep_logger.removeHandler(handler)
+                eegprep_logger.setLevel(old_level)
+                self.finished.emit()
+
+    thread = qt_core.QThread()
+    worker = Worker()
+    handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress)
+
+    worker.moveToThread(thread)
+    thread.started.connect(worker.run)
+    worker.message.connect(lambda message: _update_progress_label(progress, label, message))
+    worker.succeeded.connect(on_success)
+    worker.failed.connect(on_error)
+    worker.finished.connect(thread.quit)
+    thread.finished.connect(worker.deleteLater)
+    thread.finished.connect(thread.deleteLater)
+    thread.finished.connect(progress.close)
+    if on_finished is not None:
+        thread.finished.connect(lambda: on_finished(handle))
+
+    progress._eegprep_long_task = handle
+    progress.show()
+    qt_core.QTimer.singleShot(0, thread.start)
+    return handle
+
+
+class _SignalLogHandler(logging.Handler):
+    def __init__(self, signal: Any):
+        super().__init__(level=logging.INFO)
+        self.signal = signal
+
+    def emit(self, record: logging.LogRecord) -> None:
+        try:
+            self.signal.emit(self.format(record))
+        except Exception:
+            logger.exception("Failed to forward long-task log message")
+
+
+def _update_progress_label(progress: Any, label: str, message: str) -> None:
+    message = str(message).strip()
+    progress.setLabelText(label if not message or message == label else f"{label}\n{message}")
+
+
+def _require_qt() -> tuple[Any, Any]:
+    if QtCore is None or QtWidgets is None:
+        raise RuntimeError(
+            "PySide6 is required for EEGPrep GUI progress dialogs. Install it with "
+            "`pip install -e .[gui]` or `pip install eegprep[gui]`."
+        )
+    return QtCore, QtWidgets
diff --git a/src/eegprep/functions/guifunc/menu_actions.py b/src/eegprep/functions/guifunc/menu_actions.py
index 4e74e90c..3defe10a 100644
--- a/src/eegprep/functions/guifunc/menu_actions.py
+++ b/src/eegprep/functions/guifunc/menu_actions.py
@@ -10,6 +10,7 @@
 from typing import Any
 
 from eegprep.extension_runtime import ExtensionRuntime
+from eegprep.functions.guifunc.long_task import LongTaskHandle, run_long_task
 from eegprep.functions.guifunc.menu_placeholders import PLACEHOLDER_ACTIONS, is_placeholder_action, placeholder_message
 from eegprep.functions.guifunc.pophelp import pophelp
 from eegprep.functions.guifunc.session import EEGPrepSession, has_eeg_data
@@ -231,6 +232,7 @@ def __init__(
         self.refresh = refresh
         self.native_file_dialogs = native_file_dialogs
         self.extension_runtime = extension_runtime or ExtensionRuntime.empty()
+        self._long_tasks: list[LongTaskHandle] = []
 
     def dispatch_gui(self, action: str, parent: Any | None = None) -> None:
         """Run a menu action from Qt and show user-facing errors."""
@@ -1156,8 +1158,14 @@ def _run_pop_function(self, name: str, parent: Any | None, *, variant: str = "")
 
             out = pop_rmdat(selection, return_com=True)
         elif name == "pop_runica":
-            from eegprep.functions.popfunc.pop_runica import pop_runica
+            from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_gui_options
 
+            if parent is not None:
+                gui_options = pop_runica_gui_options(selection)
+                if gui_options is None:
+                    return
+                self._run_pop_runica_long_task(selection, gui_options, parent)
+                return
             out = pop_runica(selection, return_com=True)
         elif name == "pop_select":
             from eegprep.functions.popfunc.pop_select import pop_select
@@ -1300,6 +1308,59 @@ def commit_component_rejection(eeg_out: Any, _states: dict[int, bool]) -> None:
                 self._store_current_from_gui(eeg_out, command=command)
             self._refresh()
 
+    def _run_pop_runica_long_task(
+        self,
+        selection: Any,
+        gui_options: Mapping[str, Any],
+        parent: Any,
+    ) -> None:
+        from eegprep.functions.popfunc.pop_runica import pop_runica
+
+        self.session.begin_gui_action("pop_runica")
+
+        def task() -> Any:
+            return pop_runica(selection, gui=False, return_com=True, **dict(gui_options))
+
+        def on_success(out: Any) -> None:
+            try:
+                if isinstance(out, tuple):
+                    eeg_out, command = out[0], out[1] if len(out) > 1 else ""
+                else:
+                    eeg_out, command = out, ""
+                if command:
+                    self._store_current_from_gui(eeg_out, command=command)
+                    self._refresh()
+            except Exception as exc:
+                logger.exception("EEGPrep GUI menu action failed: pop_runica")
+                self._warn(parent, str(exc))
+
+        def on_error(exc: Exception) -> None:
+            logger.error(
+                "EEGPrep GUI menu action failed: pop_runica",
+                exc_info=(type(exc), exc, exc.__traceback__),
+            )
+            self._warn(parent, str(exc))
+
+        def on_finished(handle: LongTaskHandle) -> None:
+            if handle in self._long_tasks:
+                self._long_tasks.remove(handle)
+            self.session.end_gui_action("pop_runica")
+
+        try:
+            handle = run_long_task(
+                parent=parent,
+                title="Running ICA decomposition",
+                label="Running ICA decomposition. This may take several minutes.",
+                task=task,
+                on_success=on_success,
+                on_error=on_error,
+                on_finished=on_finished,
+            )
+        except Exception:
+            self.session.end_gui_action("pop_runica")
+            raise
+        self._long_tasks.append(handle)
+
     def _run_browser_accept_pop_action(
         self,
         name: str,
diff --git a/src/eegprep/functions/popfunc/pop_runica.py b/src/eegprep/functions/popfunc/pop_runica.py
index 537f2e36..66f0c876 100644
--- a/src/eegprep/functions/popfunc/pop_runica.py
+++ b/src/eegprep/functions/popfunc/pop_runica.py
@@ -60,7 +60,7 @@ def pop_runica(
     elif gui is None:
         gui = options is None and not has_programmatic_options and chanind is None and dataset is None
     if gui:
-        gui_result = _run_gui(EEG, renderer=renderer, initial_values=_selectamica_initial_values(selectamica))
+        gui_result = pop_runica_gui_options(EEG, renderer=renderer, selectamica=selectamica)
         if gui_result is None:
             return (EEG, "") if return_com else EEG
         icatype = gui_result["icatype"]
@@ -70,9 +70,6 @@ def pop_runica(
         dataset = gui_result["dataset"]
         concatenate = gui_result["concatenate"]
         concatcond = gui_result["concatcond"]
-        if icatype == "runica":
-            options = dict(options)
-            options.setdefault("interrupt", "on")
 
     ica_options = _normalise_ica_options(icatype, options, parsed)
     if isinstance(EEG, list):
@@ -202,6 +199,18 @@ def _run_gui(EEG, renderer=None, initial_values=None):
     }
 
 
+def pop_runica_gui_options(EEG, *, renderer=None, selectamica: str | None = None) -> dict[str, Any] | None:
+    """Collect ``pop_runica`` GUI options without running the ICA backend."""
+    gui_result = _run_gui(EEG, renderer=renderer, initial_values=_selectamica_initial_values(selectamica))
+    if gui_result is None:
+        return None
+    if gui_result["icatype"] == "runica":
+        options = dict(gui_result["options"])
+        options.setdefault("interrupt", "on")
+        gui_result["options"] = options
+    return gui_result
+
+
 def _runica_on_dataset(EEG, icatype, options, *, reorder, chanind):
     logger.info("Attempting to convert data matrix to double precision...")
     prepared = _prepare_ica_dataset(EEG)
diff --git a/src/eegprep/resources/help/pop_runica.md b/src/eegprep/resources/help/pop_runica.md
index f3498281..fd1abe72 100644
--- a/src/eegprep/resources/help/pop_runica.md
+++ b/src/eegprep/resources/help/pop_runica.md
@@ -30,6 +30,10 @@ Calling `pop_runica(EEG)` opens an EEGLAB-style dialog with:
 - Channel type/index selection controls.
 - For multiple datasets, a dataset selector and concatenate controls.
 
+When `pop_runica` is started from the main EEGPrep GUI, the ICA computation
+runs behind an indeterminate progress dialog so the window can continue
+repainting while the decomposition is being computed.
+
 Behavior:
 
 - Supplying a non-default `icatype` programmatically, for example
@@ -41,6 +45,9 @@ Behavior:
 - Existing ICLabel classifications are removed when ICA is recomputed because they no longer describe the active components.
 - `EEG.icaweights`, `EEG.icasphere`, `EEG.icawinv`, `EEG.icaact`, and `EEG.icachansind` are updated.
 - GUI-launched runica adds `'interrupt', 'on'` to the history command, matching EEGLAB's GUI path.
+- GUI-launched ICA stores the updated dataset only after the background
+  computation finishes successfully. Failed runs leave the current dataset and
+  history unchanged.
 
 Examples:
 
diff --git a/tests/conftest.py b/tests/conftest.py
index b132c768..a246c980 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -50,6 +50,7 @@ def _preload_matlab_libstdcxx() -> None:
     "tests/test_gui_pop_runica.py",
     "tests/test_gui_pop_select.py",
     "tests/test_gui_pop_study.py",
+    "tests/test_gui_long_task.py",
     "tests/test_gui_main_window.py",
     "tests/test_eegplot_gui.py",
 )
diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py
new file mode 100644
index 00000000..c00fdb5e
--- /dev/null
+++ b/tests/test_gui_long_task.py
@@ -0,0 +1,73 @@
+import logging
+import os
+
+import pytest
+
+from eegprep.functions.guifunc.long_task import run_long_task
+
+
+@pytest.fixture
+def qapp():
+    os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+    pytest.importorskip("PySide6")
+    from PySide6 import QtWidgets
+
+    app = QtWidgets.QApplication.instance() or QtWidgets.QApplication([])
+    yield app
+
+
+def test_run_long_task_returns_result_and_forwards_progress(qapp):
+    from PySide6 import QtCore
+
+    loop = QtCore.QEventLoop()
+    results = []
+    errors = []
+    finished = []
+
+    def task():
+        logging.getLogger("eegprep.tests").info("worker progress")
+        return "done"
+
+    handle = run_long_task(
+        parent=None,
+        title="Running test task",
+        label="Running test task.",
+        task=task,
+        on_success=results.append,
+        on_error=errors.append,
+        on_finished=lambda task_handle: (finished.append(task_handle), loop.quit()),
+    )
+    QtCore.QTimer.singleShot(3000, loop.quit)
+    loop.exec()
+
+    assert results == ["done"]
+    assert errors == []
+    assert finished == [handle]
+    assert "worker progress" in handle.dialog.labelText()
+
+
+def test_run_long_task_reports_errors(qapp):
+    from PySide6 import QtCore
+
+    loop = QtCore.QEventLoop()
+    results = []
+    errors = []
+
+    def task():
+        raise ValueError("task failed")
+
+    run_long_task(
+        parent=None,
+        title="Running test task",
+        label="Running test task.",
+        task=task,
+        on_success=results.append,
+        on_error=errors.append,
+        on_finished=lambda _handle: loop.quit(),
+    )
+    QtCore.QTimer.singleShot(3000, loop.quit)
+    loop.exec()
+
+    assert results == []
+    assert len(errors) == 1
+    assert str(errors[0]) == "task failed"
diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py
index 4a7400b9..99e9819f 100644
--- a/tests/test_gui_main_window.py
+++ b/tests/test_gui_main_window.py
@@ -12,6 +12,7 @@
     MenuActionDispatcher,
     action_kind,
 )
+from eegprep.functions.guifunc.long_task import LongTaskHandle
 from eegprep.functions.guifunc.menu_placeholders import is_placeholder_action, placeholder_message
 from eegprep.functions.guifunc.menu_spec import menu_enabled
 from eegprep.functions.guifunc.session import EEGPrepSession
@@ -855,6 +856,93 @@ def test_new_main_window_pop_actions_dispatch_to_real_wrappers(self):
                 else:
                     self.assertEqual(session.ALLCOM[-1], f"EEG = {action}(EEG);")
 
+    def test_gui_pop_runica_runs_ica_in_long_task_before_committing_result(self):
+        session = EEGPrepSession()
+        session.store_current(_demo_eeg(), new=True)
+        refresh = mock.Mock()
+        dispatcher = MenuActionDispatcher(session, refresh=refresh)
+        output = dict(session.EEG, setname="ica")
+        options = {
+            "icatype": "runica",
+            "options": {"extended": 1, "interrupt": "on"},
+            "reorder": "on",
+            "chanind": None,
+            "dataset": None,
+            "concatenate": "off",
+            "concatcond": "off",
+        }
+        captured = {}
+        handle = LongTaskHandle(thread=object(), worker=object(), dialog=object())
+        events = []
+        original_selection = session.EEG
+        session.add_gui_action_listener(lambda event, action: events.append((event, action)))
+
+        def fake_run_long_task(**kwargs):
+            captured.update(kwargs)
+            return handle
+
+        with (
+            mock.patch("eegprep.functions.popfunc.pop_runica.pop_runica_gui_options", return_value=options),
+            mock.patch(
+                "eegprep.functions.popfunc.pop_runica.pop_runica",
+                return_value=(output, "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'interrupt', 'on');"),
+            ) as pop_func,
+            mock.patch("eegprep.functions.guifunc.menu_actions.run_long_task", side_effect=fake_run_long_task),
+        ):
+            dispatcher.dispatch("pop_runica", parent=object())
+            self.assertEqual(session.EEG["setname"], "demo")
+
+            result = captured["task"]()
+            captured["on_success"](result)
+            captured["on_finished"](handle)
+
+        pop_func.assert_called_once_with(original_selection, gui=False, return_com=True, **options)
+        self.assertEqual(session.EEG["setname"], "ica")
+        self.assertEqual(session.ALLEEG[0]["setname"], "ica")
+        self.assertEqual(
+            session.ALLCOM[-1],
+            "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'interrupt', 'on');",
+        )
+        refresh.assert_called_once()
+        self.assertEqual(events, [("begin", "pop_runica"), ("end", "pop_runica")])
+        self.assertEqual(dispatcher._long_tasks, [])
+
+    def test_gui_pop_runica_long_task_error_does_not_mutate_session(self):
+        session = EEGPrepSession()
+        session.store_current(_demo_eeg(), new=True)
+        dispatcher = MenuActionDispatcher(session)
+        options = {
+            "icatype": "runica",
+            "options": {"extended": 1},
+            "reorder": "on",
+            "chanind": None,
+            "dataset": None,
+            "concatenate": "off",
+            "concatcond": "off",
+        }
+        captured = {}
+        handle = LongTaskHandle(thread=object(), worker=object(), dialog=object())
+        warnings = []
+
+        def fake_run_long_task(**kwargs):
+            captured.update(kwargs)
+            return handle
+
+        with (
+            mock.patch("eegprep.functions.popfunc.pop_runica.pop_runica_gui_options", return_value=options),
+            mock.patch("eegprep.functions.guifunc.menu_actions.run_long_task", side_effect=fake_run_long_task),
+            mock.patch.object(dispatcher, "_warn", side_effect=lambda _parent, message: warnings.append(message)),
+        ):
+            dispatcher.dispatch("pop_runica", parent=object())
+            captured["on_error"](ValueError("runica failed"))
+            captured["on_finished"](handle)
+
+        self.assertEqual(session.EEG["setname"], "demo")
+        self.assertEqual(session.ALLEEG[0]["setname"], "demo")
+        self.assertEqual(session.ALLCOM, [])
+        self.assertEqual(warnings, ["runica failed"])
+        self.assertEqual(dispatcher._long_tasks, [])
+
     def test_gui_transform_action_can_commit_processed_dataset_as_new_set(self):
         session = EEGPrepSession()
         session.store_current(_demo_eeg(), new=True)

From 8ba958e0a1d51cab03cebda6166b66b3d3822e7b Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 01:14:58 -0700
Subject: [PATCH 14/16] Marshal long task callbacks to UI thread

---
 src/eegprep/functions/guifunc/long_task.py | 42 +++++++++++++-----
 tests/test_gui_long_task.py                | 51 +++++++++++++++++++++-
 2 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py
index 09bf32b1..0cefc811 100644
--- a/src/eegprep/functions/guifunc/long_task.py
+++ b/src/eegprep/functions/guifunc/long_task.py
@@ -14,9 +14,6 @@
     QtWidgets = None
 
 
-logger = logging.getLogger(__name__)
-
-
 @dataclass
 class LongTaskHandle:
     """Keep Qt task objects alive until their worker thread finishes."""
@@ -24,6 +21,7 @@ class LongTaskHandle:
     thread: Any
     worker: Any
     dialog: Any
+    receiver: Any | None = None
 
 
 def run_long_task(
@@ -75,19 +73,39 @@ def run(self) -> None:
 
     thread = qt_core.QThread()
     worker = Worker()
-    handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress)
+
+    class Receiver(qt_core.QObject):
+        @qt_core.Slot(str)
+        def handle_message(self, message: str) -> None:
+            _update_progress_label(progress, label, message)
+
+        @qt_core.Slot(object)
+        def handle_success(self, result: Any) -> None:
+            on_success(result)
+
+        @qt_core.Slot(object)
+        def handle_error(self, exc: Exception) -> None:
+            on_error(exc)
+
+        @qt_core.Slot()
+        def handle_finished(self) -> None:
+            progress.close()
+            if on_finished is not None:
+                on_finished(handle)
+
+    receiver = Receiver()
+    handle = LongTaskHandle(thread=thread, worker=worker, dialog=progress, receiver=receiver)
 
     worker.moveToThread(thread)
     thread.started.connect(worker.run)
-    worker.message.connect(lambda message: _update_progress_label(progress, label, message))
-    worker.succeeded.connect(on_success)
-    worker.failed.connect(on_error)
+    worker.message.connect(receiver.handle_message)
+    worker.succeeded.connect(receiver.handle_success)
+    worker.failed.connect(receiver.handle_error)
     worker.finished.connect(thread.quit)
-    thread.finished.connect(worker.deleteLater)
+    worker.finished.connect(worker.deleteLater)
     thread.finished.connect(thread.deleteLater)
-    thread.finished.connect(progress.close)
-    if on_finished is not None:
-        thread.finished.connect(lambda: on_finished(handle))
+    thread.finished.connect(receiver.handle_finished)
+    thread.finished.connect(receiver.deleteLater)
 
     progress._eegprep_long_task = handle
     progress.show()
@@ -104,7 +122,7 @@ def emit(self, record: logging.LogRecord) -> None:
         try:
             self.signal.emit(self.format(record))
         except Exception:
-            logger.exception("Failed to forward long-task log message")
+            self.handleError(record)
 
 
 def _update_progress_label(progress: Any, label: str, message: str) -> None:
diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py
index c00fdb5e..25761f4f 100644
--- a/tests/test_gui_long_task.py
+++ b/tests/test_gui_long_task.py
@@ -1,8 +1,10 @@
 import logging
 import os
+import threading
 
 import pytest
 
+import eegprep.functions.guifunc.long_task as long_task_module
 from eegprep.functions.guifunc.long_task import run_long_task
 
 
@@ -46,23 +48,69 @@ def task():
     assert "worker progress" in handle.dialog.labelText()
 
 
+def test_run_long_task_callbacks_are_delivered_on_main_thread(qapp, monkeypatch):
+    from PySide6 import QtCore
+
+    loop = QtCore.QEventLoop()
+    main_thread_id = threading.get_ident()
+    task_thread_ids = []
+    message_thread_ids = []
+    success_thread_ids = []
+    original_update = long_task_module._update_progress_label
+
+    def update_progress_label(progress, label, message):
+        message_thread_ids.append(threading.get_ident())
+        original_update(progress, label, message)
+
+    def task():
+        task_thread_ids.append(threading.get_ident())
+        logging.getLogger("eegprep.tests").info("worker progress")
+        return "done"
+
+    monkeypatch.setattr(long_task_module, "_update_progress_label", update_progress_label)
+
+    run_long_task(
+        parent=None,
+        title="Running test task",
+        label="Running test task.",
+        task=task,
+        on_success=lambda _result: success_thread_ids.append(threading.get_ident()),
+        on_error=lambda _exc: None,
+        on_finished=lambda _handle: loop.quit(),
+    )
+    QtCore.QTimer.singleShot(3000, loop.quit)
+    loop.exec()
+
+    assert task_thread_ids
+    assert task_thread_ids[0] != main_thread_id
+    assert message_thread_ids
+    assert all(thread_id == main_thread_id for thread_id in message_thread_ids)
+    assert success_thread_ids == [main_thread_id]
+
+
 def test_run_long_task_reports_errors(qapp):
     from PySide6 import QtCore
 
     loop = QtCore.QEventLoop()
     results = []
     errors = []
+    error_thread_ids = []
+    main_thread_id = threading.get_ident()
 
     def task():
         raise ValueError("task failed")
 
+    def on_error(exc):
+        error_thread_ids.append(threading.get_ident())
+        errors.append(exc)
+
     run_long_task(
         parent=None,
         title="Running test task",
         label="Running test task.",
         task=task,
         on_success=results.append,
-        on_error=errors.append,
+        on_error=on_error,
         on_finished=lambda _handle: loop.quit(),
     )
     QtCore.QTimer.singleShot(3000, loop.quit)
@@ -71,3 +119,4 @@ def task():
     assert results == []
     assert len(errors) == 1
     assert str(errors[0]) == "task failed"
+    assert error_thread_ids == [main_thread_id]

From 91563111df8ffd025e4a86d5e734fc635d8c56da Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 18:22:42 -0700
Subject: [PATCH 15/16] Install console deps in dev environment

---
 docs/source/contributing.rst            | 1 +
 docs/source/development.rst             | 6 ++++--
 docs/source/user_guide/installation.rst | 4 +++-
 pyproject.toml                          | 5 +++++
 tests/test_public_api_examples.py       | 9 +++++++++
 uv.lock                                 | 7 +++++++
 6 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index 147deecf..cd2d957c 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -58,6 +58,7 @@ If you only need documentation dependencies, sync the docs extra:
 
 - The eegprep package in editable mode
 - Repo tooling dependencies
+- GUI and ``eegprep-console`` runtime dependencies
 - Documentation dependencies when ``--extra docs`` is used
 
 Code Style Guidelines
diff --git a/docs/source/development.rst b/docs/source/development.rst
index f04c9171..ed86b16b 100644
--- a/docs/source/development.rst
+++ b/docs/source/development.rst
@@ -58,8 +58,10 @@ Install the default development environment:
     uv sync --group dev
 
 ``uv sync`` creates ``.venv/`` and installs EEGPrep in editable mode from the
-locked dependency set. Use ``uv run`` for commands so they execute inside this
-environment.
+locked dependency set. The development environment includes the GUI and
+``eegprep-console`` runtime dependencies so ``uv run eegprep-console --full``
+works from a fresh checkout. Use ``uv run`` for commands so they execute inside
+this environment.
 
 Install Documentation Dependencies
 ----------------------------------
diff --git a/docs/source/user_guide/installation.rst b/docs/source/user_guide/installation.rst
index 25f4a0ce..11d9893d 100644
--- a/docs/source/user_guide/installation.rst
+++ b/docs/source/user_guide/installation.rst
@@ -75,7 +75,9 @@ To install eegprep from source for development:
     uv sync --group dev
 
 ``uv sync`` creates the project environment, installs EEGPrep in editable mode,
-and uses ``uv.lock`` for reproducible dependency resolution.
+and uses ``uv.lock`` for reproducible dependency resolution. The development
+environment includes the GUI and console runtime dependencies, so a fresh
+checkout can immediately launch ``uv run eegprep-console --full``.
 
 To develop or build documentation from source, include the docs extra:
 
diff --git a/pyproject.toml b/pyproject.toml
index 5fb8d58f..a2515e9a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,12 @@ eegprep-validate-extension-catalog = "eegprep.extension_catalog:main"
 
 [dependency-groups]
 dev = [
+  # Keep `uv run eegprep-console --full` working from a fresh source checkout.
+  # Published installs still keep GUI/console dependencies behind extras.
+  "ipython>=8.0",
   "pytest>=8.0",
+  "pyqtgraph>=0.13.7",
+  "PySide6>=6.6",
   "ruff>=0.15.14",
   "tomli>=2.0; python_version < '3.11'",
   "ty>=0.0.39",
diff --git a/tests/test_public_api_examples.py b/tests/test_public_api_examples.py
index af4b524e..525b5ad9 100644
--- a/tests/test_public_api_examples.py
+++ b/tests/test_public_api_examples.py
@@ -72,6 +72,15 @@ def test_project_entry_points_cover_gui_and_console() -> None:
     assert pyproject["project"]["scripts"]["eegprep"] == "eegprep.cli.main:main"
 
 
+def test_development_dependencies_cover_console_runtime() -> None:
+    pyproject = _read_pyproject()
+    dev_dependencies = set(pyproject["dependency-groups"]["dev"])
+
+    assert "ipython>=8.0" in dev_dependencies
+    assert "pyqtgraph>=0.13.7" in dev_dependencies
+    assert "PySide6>=6.6" in dev_dependencies
+
+
 def test_setuptools_package_data_covers_runtime_resources() -> None:
     pyproject = _read_pyproject()
     package_root = REPO_ROOT / "src/eegprep"
diff --git a/uv.lock b/uv.lock
index 4a78a0a0..f100f602 100644
--- a/uv.lock
+++ b/uv.lock
@@ -753,6 +753,10 @@ torch = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "pyqtgraph" },
+    { name = "pyside6" },
     { name = "pytest" },
     { name = "ruff" },
     { name = "tomli", marker = "python_full_version < '3.11'" },
@@ -808,6 +812,9 @@ provides-extras = ["torch", "eeglabio", "gui", "console", "docs", "all"]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "ipython", specifier = ">=8.0" },
+    { name = "pyqtgraph", specifier = ">=0.13.7" },
+    { name = "pyside6", specifier = ">=6.6" },
     { name = "pytest", specifier = ">=8.0" },
     { name = "ruff", specifier = ">=0.15.14" },
     { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" },

From 6cb7e14bd56b6e7a8fcaeecd5722368882019286 Mon Sep 17 00:00:00 2001
From: suraj-ranganath <suraj.ranganath@gmail.com>
Date: Thu, 11 Jun 2026 18:48:49 -0700
Subject: [PATCH 16/16] Address console stack review findings

---
 src/eegprep/functions/guifunc/long_task.py  | 54 +++++++++++++++------
 src/eegprep/functions/guifunc/session.py    |  3 ++
 src/eegprep/functions/popfunc/pop_newset.py |  5 +-
 tests/test_gui_long_task.py                 | 31 ++++++++++++
 tests/test_gui_main_window.py               | 35 +++++++++++++
 tests/test_gui_pop_runica.py                | 15 +++++-
 tests/test_pop_newset.py                    | 20 ++++++++
 7 files changed, 145 insertions(+), 18 deletions(-)

diff --git a/src/eegprep/functions/guifunc/long_task.py b/src/eegprep/functions/guifunc/long_task.py
index 0cefc811..a7aa9889 100644
--- a/src/eegprep/functions/guifunc/long_task.py
+++ b/src/eegprep/functions/guifunc/long_task.py
@@ -5,6 +5,7 @@
 from collections.abc import Callable
 from dataclasses import dataclass
 import logging
+import threading
 from typing import Any
 
 try:  # pragma: no cover - depends on optional GUI dependency
@@ -24,6 +25,11 @@ class LongTaskHandle:
     receiver: Any | None = None
 
 
+_LOGGER_LOCK = threading.Lock()
+_LOGGER_DEPTH = 0
+_LOGGER_OLD_LEVEL: int | None = None
+
+
 def run_long_task(
     *,
     parent: Any | None,
@@ -54,22 +60,15 @@ class Worker(qt_core.QObject):
         def run(self) -> None:
             handler = _SignalLogHandler(self.message)
             handler.setFormatter(logging.Formatter("%(message)s"))
-            handler.setLevel(logging.INFO)
-            eegprep_logger = logging.getLogger("eegprep")
-            old_level = eegprep_logger.level
-            if old_level == logging.NOTSET or old_level > logging.INFO:
-                eegprep_logger.setLevel(logging.INFO)
-            eegprep_logger.addHandler(handler)
-            try:
+            with _ForwardEegprepLogs(handler):
                 self.message.emit(label)
-                result = task()
-                self.succeeded.emit(result)
-            except Exception as exc:  # noqa: BLE001 - forwarded to GUI error handler.
-                self.failed.emit(exc)
-            finally:
-                eegprep_logger.removeHandler(handler)
-                eegprep_logger.setLevel(old_level)
-                self.finished.emit()
+                try:
+                    result = task()
+                    self.succeeded.emit(result)
+                except Exception as exc:  # noqa: BLE001 - forwarded to GUI error handler.
+                    self.failed.emit(exc)
+                finally:
+                    self.finished.emit()
 
     thread = qt_core.QThread()
     worker = Worker()
@@ -125,6 +124,31 @@ def emit(self, record: logging.LogRecord) -> None:
             self.handleError(record)
 
 
+class _ForwardEegprepLogs:
+    def __init__(self, handler: logging.Handler) -> None:
+        self.handler = handler
+        self.logger = logging.getLogger("eegprep")
+
+    def __enter__(self) -> None:
+        global _LOGGER_DEPTH, _LOGGER_OLD_LEVEL
+        with _LOGGER_LOCK:
+            if _LOGGER_DEPTH == 0:
+                _LOGGER_OLD_LEVEL = self.logger.level
+                if self.logger.level == logging.NOTSET or self.logger.level > logging.INFO:
+                    self.logger.setLevel(logging.INFO)
+            _LOGGER_DEPTH += 1
+            self.logger.addHandler(self.handler)
+
+    def __exit__(self, _exc_type: Any, _exc: Any, _tb: Any) -> None:
+        global _LOGGER_DEPTH, _LOGGER_OLD_LEVEL
+        with _LOGGER_LOCK:
+            self.logger.removeHandler(self.handler)
+            _LOGGER_DEPTH -= 1
+            if _LOGGER_DEPTH == 0:
+                self.logger.setLevel(logging.NOTSET if _LOGGER_OLD_LEVEL is None else _LOGGER_OLD_LEVEL)
+                _LOGGER_OLD_LEVEL = None
+
+
 def _update_progress_label(progress: Any, label: str, message: str) -> None:
     message = str(message).strip()
     progress.setLabelText(label if not message or message == label else f"{label}\n{message}")
diff --git a/src/eegprep/functions/guifunc/session.py b/src/eegprep/functions/guifunc/session.py
index 6c8df35f..951081ab 100644
--- a/src/eegprep/functions/guifunc/session.py
+++ b/src/eegprep/functions/guifunc/session.py
@@ -253,6 +253,9 @@ def apply_workspace_state(
             if resolved_currentset and max(resolved_currentset) > len(resolved_alleeg):
                 raise ValueError("CURRENTSET contains indices outside ALLEEG")
             resolved_eeg = self._resolve_workspace_eeg(eeg, resolved_alleeg, resolved_currentset)
+            current = resolved_eeg if isinstance(resolved_eeg, list) else [resolved_eeg]
+            if resolved_currentset and len(current) != len(resolved_currentset):
+                raise ValueError("EEG selection length must match CURRENTSET")
             self.ALLEEG = resolved_alleeg
             self.EEG = resolved_eeg
             self.CURRENTSET = resolved_currentset
diff --git a/src/eegprep/functions/popfunc/pop_newset.py b/src/eegprep/functions/popfunc/pop_newset.py
index 121304ed..0f43bac2 100644
--- a/src/eegprep/functions/popfunc/pop_newset.py
+++ b/src/eegprep/functions/popfunc/pop_newset.py
@@ -153,8 +153,9 @@ def _run_gui(
         "gui": "off",
     }
     comments = result.get("comments")
-    if comments is None:
-        comments = result.get("editdescription")
+    edited_comments = result.get("editdescription")
+    if comments is None and isinstance(edited_comments, str):
+        comments = edited_comments
     if comments is not None:
         gui_options["comments"] = str(comments)
     if _is_on(result.get("savenew")):
diff --git a/tests/test_gui_long_task.py b/tests/test_gui_long_task.py
index 25761f4f..33b33091 100644
--- a/tests/test_gui_long_task.py
+++ b/tests/test_gui_long_task.py
@@ -48,6 +48,37 @@ def task():
     assert "worker progress" in handle.dialog.labelText()
 
 
+def test_run_long_task_restores_eegprep_logger_level_after_forwarding_progress(qapp):
+    from PySide6 import QtCore
+
+    loop = QtCore.QEventLoop()
+    logger = logging.getLogger("eegprep")
+    original_level = logger.level
+    logger.setLevel(logging.WARNING)
+
+    def task():
+        logging.getLogger("eegprep.tests").info("worker progress")
+        return "done"
+
+    try:
+        handle = run_long_task(
+            parent=None,
+            title="Running test task",
+            label="Running test task.",
+            task=task,
+            on_success=lambda _result: None,
+            on_error=lambda _exc: None,
+            on_finished=lambda _handle: loop.quit(),
+        )
+        QtCore.QTimer.singleShot(3000, loop.quit)
+        loop.exec()
+
+        assert "worker progress" in handle.dialog.labelText()
+        assert logger.level == logging.WARNING
+    finally:
+        logger.setLevel(original_level)
+
+
 def test_run_long_task_callbacks_are_delivered_on_main_thread(qapp, monkeypatch):
     from PySide6 import QtCore
 
diff --git a/tests/test_gui_main_window.py b/tests/test_gui_main_window.py
index 99e9819f..2e21d3a2 100644
--- a/tests/test_gui_main_window.py
+++ b/tests/test_gui_main_window.py
@@ -360,6 +360,41 @@ def test_session_stores_multiple_selected_datasets_back_to_same_indices(self):
         self.assertEqual(session.CURRENTSET, [1, 2])
         self.assertEqual([item["ref"] for item in session.ALLEEG], ["average", "average"])
 
+    def test_apply_workspace_state_rejects_currentset_outside_alleeg_before_mutating(self):
+        session = EEGPrepSession()
+        session.store_current(_demo_eeg(), new=True)
+        original_eeg = session.EEG
+        original_alleeg = list(session.ALLEEG)
+        original_currentset = list(session.CURRENTSET)
+
+        with self.assertRaisesRegex(ValueError, "CURRENTSET contains indices outside ALLEEG"):
+            session.apply_workspace_state(alleeg=[_demo_eeg()], currentset=2)
+
+        self.assertIs(session.EEG, original_eeg)
+        self.assertEqual(len(session.ALLEEG), len(original_alleeg))
+        self.assertIs(session.ALLEEG[0], original_alleeg[0])
+        self.assertEqual(session.CURRENTSET, original_currentset)
+
+    def test_apply_workspace_state_rejects_eeg_list_length_mismatch_before_mutating(self):
+        session = EEGPrepSession()
+        first = _demo_eeg()
+        second = _demo_eeg()
+        second["setname"] = "second"
+        session.store_current(first, new=True)
+        session.store_current(second, new=True)
+        original_eeg = session.EEG
+        original_alleeg = list(session.ALLEEG)
+        original_currentset = list(session.CURRENTSET)
+
+        with self.assertRaisesRegex(ValueError, "EEG selection length must match CURRENTSET"):
+            session.apply_workspace_state(alleeg=[first, second], eeg=[first], currentset=[1, 2])
+
+        self.assertIs(session.EEG, original_eeg)
+        self.assertEqual(len(session.ALLEEG), len(original_alleeg))
+        self.assertIs(session.ALLEEG[0], original_alleeg[0])
+        self.assertIs(session.ALLEEG[1], original_alleeg[1])
+        self.assertEqual(session.CURRENTSET, original_currentset)
+
     def test_session_delete_current_selects_remaining_dataset(self):
         session = EEGPrepSession()
         first = _demo_eeg()
diff --git a/tests/test_gui_pop_runica.py b/tests/test_gui_pop_runica.py
index 0b05c8af..d31004e2 100644
--- a/tests/test_gui_pop_runica.py
+++ b/tests/test_gui_pop_runica.py
@@ -10,7 +10,7 @@
 from eegprep.functions.guifunc.spec import controls_by_tag
 from eegprep.functions.guifunc.qt import QtDialogRenderer
 from eegprep.functions.popfunc.pop_loadset import pop_loadset
-from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_dialog_spec
+from eegprep.functions.popfunc.pop_runica import pop_runica, pop_runica_dialog_spec, pop_runica_gui_options
 
 
 def _eeg():
@@ -96,6 +96,19 @@ def run(self, spec, initial_values=None):
             "EEG = pop_runica(EEG, 'icatype', 'runica', 'extended', 1, 'maxsteps', 2, 'interrupt', 'on');",
         )
 
+    def test_gui_options_do_not_inject_interrupt_for_non_runica_algorithms(self):
+        class Renderer:
+            def run(self, spec, initial_values=None):
+                return {"icatype": 4, "params": "'maxiter', 7", "reorder": True, "chantype": ""}
+
+        options = pop_runica_gui_options(_eeg(), renderer=Renderer())
+
+        self.assertIsNotNone(options)
+        assert options is not None
+        self.assertEqual(options["icatype"], "picard")
+        self.assertEqual(options["options"], {"maxiter": 7})
+        self.assertNotIn("interrupt", options["options"])
+
     def test_gui_result_runs_runica_and_returns_history(self):
         class Renderer:
             def run(self, spec, initial_values=None):
diff --git a/tests/test_pop_newset.py b/tests/test_pop_newset.py
index f4f56664..d135041f 100644
--- a/tests/test_pop_newset.py
+++ b/tests/test_pop_newset.py
@@ -171,6 +171,26 @@ def run(self, _spec, initial_values=None):
     )
 
 
+def test_pop_newset_gui_untouched_description_button_preserves_comments():
+    class Renderer:
+        def run(self, _spec, initial_values=None):
+            return {"setname": "processed", "editdescription": False, "overwrite": 1}
+
+    alleeg, current, current_set, _command = pop_newset([], _eeg(name="original"), 0)
+    current["comments"] = "old notes"
+    processed = _eeg(name="processed")
+    processed["comments"] = "old notes"
+
+    alleeg, current, current_set, command = pop_newset(alleeg, processed, current_set, "gui", "on", renderer=Renderer())
+
+    assert len(alleeg) == 2
+    assert current_set == 2
+    assert current["comments"] == "old notes"
+    assert command == (
+        "[ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, CURRENTSET, 'setname', 'processed', 'overwrite', 'off');"
+    )
+
+
 def test_pop_newset_gui_choice_can_overwrite_current_dataset():
     class Renderer:
         def run(self, _spec, initial_values=None):