From 848266df4e4a87460c12eabed18dc41dd32d3abd Mon Sep 17 00:00:00 2001
From: Ben Dichter <ben.dichter@gmail.com>
Date: Wed, 13 May 2026 14:50:47 -0400
Subject: [PATCH 1/3] 000718_demo: drop dandi_authenticate() calls (dandiset is
 no longer embargoed)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The notebook calls `client.dandi_authenticate()` in three cells (8, 43,
63), each with the inline comment:

  'This line is necessary when the dataset is in embargoed mode and
   only owners can view the data, once it will be published this line
   can be removed.'

Verified via the DANDI API that 000718 is now `embargo_status: OPEN`
— so the authentication call is no longer needed (and would break
Colab use because Colab users don't have DANDI creds wired up).

`dandi_authenticate()` is also what was triggering the `input()`
prompt that put this notebook on `.github/notebook-test-exclusions.txt`
with 'raises EOFError under non-interactive nbconvert'. Removing the
calls makes the notebook CI-testable, so also dropped it from the
test-exclusions list.

Same fix that landed in #100 for 001172_demo.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/notebook-test-exclusions.txt      | 4 ----
 000718/CaiLab/zaki_2024/000718_demo.ipynb | 6 ------
 2 files changed, 10 deletions(-)

diff --git a/.github/notebook-test-exclusions.txt b/.github/notebook-test-exclusions.txt
index 9332426..fc6cb64 100644
--- a/.github/notebook-test-exclusions.txt
+++ b/.github/notebook-test-exclusions.txt
@@ -65,10 +65,6 @@ tutorials/cosyne_2023/advanced_asset_search.ipynb
 # in headless CI. Works in Colab.
 001170/ReimerLab/public_demo/001170_demo.ipynb
 
-# Calls input() to prompt the user; raises EOFError under non-interactive
-# nbconvert. Works in Colab where input() is wired to a UI prompt.
-000718/CaiLab/zaki_2024/000718_demo.ipynb
-
 # =============================================================================
 # Pre-existing notebook content bugs — track + fix in dedicated PRs.
 # =============================================================================
diff --git a/000718/CaiLab/zaki_2024/000718_demo.ipynb b/000718/CaiLab/zaki_2024/000718_demo.ipynb
index 1b81266..2352645 100644
--- a/000718/CaiLab/zaki_2024/000718_demo.ipynb
+++ b/000718/CaiLab/zaki_2024/000718_demo.ipynb
@@ -240,8 +240,6 @@
     "\n",
     "dandiset_id = \"000718\"\n",
     "with DandiAPIClient() as client:\n",
-    "    #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n",
-    "    client.dandi_authenticate() \n",
     "    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n",
     "    s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n",
     "\n",
@@ -2125,8 +2123,6 @@
     "\n",
     "dandiset_id = \"000718\"\n",
     "with DandiAPIClient() as client:\n",
-    "    #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n",
-    "    client.dandi_authenticate() \n",
     "    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n",
     "    s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n",
     "\n",
@@ -3964,8 +3960,6 @@
     "\n",
     "dandiset_id = \"000718\"\n",
     "with DandiAPIClient() as client:\n",
-    "    #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n",
-    "    client.dandi_authenticate() \n",
     "    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n",
     "    s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n",
     "\n",

From e0d762157776eab95af97c0346775d4cf5643b59 Mon Sep 17 00:00:00 2001
From: Ben Dichter <ben.dichter@gmail.com>
Date: Wed, 13 May 2026 14:59:59 -0400
Subject: [PATCH 2/3] Runner: stream notebook execution output for CI
 diagnostics

Two changes to .github/scripts/run_notebook.py:

1. Insert progress markers between cells. The nbconvert-generated .py
   has `# In[N]:` separators; instrument the script to print
   `::cell N::` before each. If execution dies mid-cell (eg OOM
   kills the process), the last printed marker pinpoints the offender.

2. Stream subprocess output live to the parent's stdout instead of
   capturing it all in memory and printing only on exit. The previous
   captured-output design meant a SIGTERM'd runner produced empty
   logs; with streaming, every output up to the kill point reaches
   the Actions logs. Still tees a copy to the log file for the
   uploaded artifact.

Motivated by PR #162's 000718_demo failure: two consecutive runs got
SIGTERM at ~80s with no diagnostic output, so we couldn't tell what
was happening. With this instrumentation in place, the next run will
show exactly which cell triggers the kill.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/scripts/run_notebook.py | 60 ++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/.github/scripts/run_notebook.py b/.github/scripts/run_notebook.py
index 3c5b31e..27bd88c 100644
--- a/.github/scripts/run_notebook.py
+++ b/.github/scripts/run_notebook.py
@@ -143,30 +143,58 @@ def finalize(stage: str, ok: bool, error: str | None = None, **extra) -> int:
     r = run(["jupyter", "nbconvert", "--to", "script", "--stdout", str(tmp_nb)])
     if r.returncode != 0:
         return finalize("convert", False, error=r.stderr[-3000:])
-    script_path.write_text(r.stdout)
-
+    raw_script = r.stdout
+
+    # Insert progress markers before each `# In[...]` cell separator so the
+    # CI log shows which cell is running. If the process is killed mid-cell
+    # (eg OOM), the last marker tells you the offending cell.
+    instrumented = []
+    for line in raw_script.splitlines(keepends=True):
+        m = re.match(r"# In\[(.*?)\]:", line.strip())
+        if m:
+            cell_label = m.group(1) or "?"
+            instrumented.append(
+                f'import sys as _sys; print("::cell {cell_label}::", flush=True); _sys.stderr.flush()\n'
+            )
+        instrumented.append(line)
+    script_path.write_text("".join(instrumented))
+
+    # Stream ipython output live to this process's stdout/stderr so CI logs
+    # show progress in real time. Tee a copy to the log file via Popen.
+    import shlex
+    cmd = ["ipython", "--colors=NoColor", "--no-banner",
+           "--InteractiveShell.history_load_length=0", str(script_path)]
+    print(f"\n=== executing notebook (streaming) ===\n  {shlex.join(cmd)}", flush=True)
+    log_f = log_path.open("a")
+    log_f.write(f"\n=== execute (streaming) ===\n")
+    log_f.flush()
+    proc = subprocess.Popen(
+        cmd, cwd=str(nb_dir),
+        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True, bufsize=1,
+    )
+    captured = []
     try:
-        r = run(
-            ["ipython", "--colors=NoColor", "--no-banner",
-             "--InteractiveShell.history_load_length=0", str(script_path)],
-            cwd=str(nb_dir),
-            timeout=args.timeout,
-        )
+        for line in proc.stdout:
+            sys.stdout.write(line)
+            sys.stdout.flush()
+            log_f.write(line)
+            captured.append(line)
+        proc.wait(timeout=args.timeout)
     except subprocess.TimeoutExpired:
+        proc.kill()
+        log_f.close()
         return finalize("execute", False,
                         error=f"hit overall {args.timeout}s timeout")
+    log_f.close()
 
-    with log_path.open("a") as f:
-        f.write(f"\n=== execute rc={r.returncode} ===\n")
-        f.write(f"--- stdout (tail) ---\n{r.stdout[-2000:]}\n")
-        f.write(f"--- stderr (tail) ---\n{r.stderr[-3000:]}\n")
-
+    full_output = "".join(captured)
     looks_failed = (
-        r.returncode != 0
-        or "Traceback (most recent call last)" in r.stderr
+        proc.returncode != 0
+        or "Traceback (most recent call last)" in full_output
     )
     if looks_failed:
-        return finalize("execute", False, error=r.stderr[-3000:])
+        return finalize("execute", False, error=full_output[-3000:])
 
     return finalize("done", True)
 

From 2c46b1ba5dfa9a264f1edf0c61d9569c37f646a3 Mon Sep 17 00:00:00 2001
From: Ben Dichter <ben.dichter@gmail.com>
Date: Wed, 13 May 2026 15:05:13 -0400
Subject: [PATCH 3/3] 000718_demo: subsample EEG signal for visualization
 (avoid OOM)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EEG plot cell did:

    ax.plot(timestamps, eeg_signal.data[:])

This loads the full ~1-week EEG recording into memory (~60M+ samples
through remfile HTTP range requests, then matplotlib trying to render
every point). CI consistently SIGTERMs ~80s into execution at this
cell — the runner gets OOM-killed.

Subsample to ~50k points for the overview plot. Users who want the
full-resolution signal can slice `eeg_signal.data[start:stop]` for
a specific time window. This is also a real UX improvement on Colab —
plotting 60M points was unusably slow there too.

Caught after PR-#162's CI run showed the kill happens right after the
::cell 41:: progress marker.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 000718/CaiLab/zaki_2024/000718_demo.ipynb | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/000718/CaiLab/zaki_2024/000718_demo.ipynb b/000718/CaiLab/zaki_2024/000718_demo.ipynb
index 2352645..9b444e1 100644
--- a/000718/CaiLab/zaki_2024/000718_demo.ipynb
+++ b/000718/CaiLab/zaki_2024/000718_demo.ipynb
@@ -4062,9 +4062,20 @@
    ],
    "source": [
     "eeg_signal = nwbfile.acquisition[\"EEGSignal\"]\n",
+    "\n",
+    "# This recording spans roughly a week at the EEG sample rate, so loading and\n",
+    "# plotting `eeg_signal.data[:]` is hundreds of millions of points — too large\n",
+    "# for matplotlib (and too slow over remfile streaming). Subsample to ~50k\n",
+    "# points for the overview plot; users who want full-resolution data can slice\n",
+    "# `eeg_signal.data[start:stop]` directly.\n",
+    "n = eeg_signal.data.shape[0]\n",
+    "stride = max(1, n // 50_000)\n",
+    "ts_view = timestamps[::stride]\n",
+    "data_view = eeg_signal.data[::stride]\n",
+    "\n",
     "fig, ax = plt.subplots(figsize=(18, 6))\n",
-    "ax.plot(timestamps, eeg_signal.data[:])\n",
-    "ax.set_title(\"EEG signal\")\n",
+    "ax.plot(ts_view, data_view)\n",
+    "ax.set_title(f\"EEG signal (subsampled 1:{stride})\")\n",
     "ax.set_xlabel(\"Time (s)\")\n",
     "ax.set_ylabel(eeg_signal.unit)\n",
     "plt.show()"