From 848266df4e4a87460c12eabed18dc41dd32d3abd Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 13 May 2026 14:50:47 -0400 Subject: [PATCH 1/3] 000718_demo: drop dandi_authenticate() calls (dandiset is no longer embargoed) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The notebook calls `client.dandi_authenticate()` in three cells (8, 43, 63), each with the inline comment: 'This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.' Verified via the DANDI API that 000718 is now `embargo_status: OPEN` — so the authentication call is no longer needed (and would break Colab use because Colab users don't have DANDI creds wired up). `dandi_authenticate()` is also what was triggering the `input()` prompt that put this notebook on `.github/notebook-test-exclusions.txt` with 'raises EOFError under non-interactive nbconvert'. Removing the calls makes the notebook CI-testable, so also dropped it from the test-exclusions list. Same fix that landed in #100 for 001172_demo. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/notebook-test-exclusions.txt | 4 ---- 000718/CaiLab/zaki_2024/000718_demo.ipynb | 6 ------ 2 files changed, 10 deletions(-) diff --git a/.github/notebook-test-exclusions.txt b/.github/notebook-test-exclusions.txt index 9332426..fc6cb64 100644 --- a/.github/notebook-test-exclusions.txt +++ b/.github/notebook-test-exclusions.txt @@ -65,10 +65,6 @@ tutorials/cosyne_2023/advanced_asset_search.ipynb # in headless CI. Works in Colab. 001170/ReimerLab/public_demo/001170_demo.ipynb -# Calls input() to prompt the user; raises EOFError under non-interactive -# nbconvert. Works in Colab where input() is wired to a UI prompt. -000718/CaiLab/zaki_2024/000718_demo.ipynb - # ============================================================================= # Pre-existing notebook content bugs — track + fix in dedicated PRs. # ============================================================================= diff --git a/000718/CaiLab/zaki_2024/000718_demo.ipynb b/000718/CaiLab/zaki_2024/000718_demo.ipynb index 1b81266..2352645 100644 --- a/000718/CaiLab/zaki_2024/000718_demo.ipynb +++ b/000718/CaiLab/zaki_2024/000718_demo.ipynb @@ -240,8 +240,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", @@ -2125,8 +2123,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", @@ -3964,8 +3960,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", From e0d762157776eab95af97c0346775d4cf5643b59 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 13 May 2026 14:59:59 -0400 Subject: [PATCH 2/3] Runner: stream notebook execution output for CI diagnostics Two changes to .github/scripts/run_notebook.py: 1. Insert progress markers between cells. The nbconvert-generated .py has `# In[N]:` separators; instrument the script to print `::cell N::` before each. If execution dies mid-cell (eg OOM kills the process), the last printed marker pinpoints the offender. 2. Stream subprocess output live to the parent's stdout instead of capturing it all in memory and printing only on exit. The previous captured-output design meant a SIGTERM'd runner produced empty logs; with streaming, every output up to the kill point reaches the Actions logs. Still tees a copy to the log file for the uploaded artifact. Motivated by PR #162's 000718_demo failure: two consecutive runs got SIGTERM at ~80s with no diagnostic output, so we couldn't tell what was happening. With this instrumentation in place, the next run will show exactly which cell triggers the kill. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/scripts/run_notebook.py | 60 ++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/.github/scripts/run_notebook.py b/.github/scripts/run_notebook.py index 3c5b31e..27bd88c 100644 --- a/.github/scripts/run_notebook.py +++ b/.github/scripts/run_notebook.py @@ -143,30 +143,58 @@ def finalize(stage: str, ok: bool, error: str | None = None, **extra) -> int: r = run(["jupyter", "nbconvert", "--to", "script", "--stdout", str(tmp_nb)]) if r.returncode != 0: return finalize("convert", False, error=r.stderr[-3000:]) - script_path.write_text(r.stdout) - + raw_script = r.stdout + + # Insert progress markers before each `# In[...]` cell separator so the + # CI log shows which cell is running. If the process is killed mid-cell + # (eg OOM), the last marker tells you the offending cell. + instrumented = [] + for line in raw_script.splitlines(keepends=True): + m = re.match(r"# In\[(.*?)\]:", line.strip()) + if m: + cell_label = m.group(1) or "?" + instrumented.append( + f'import sys as _sys; print("::cell {cell_label}::", flush=True); _sys.stderr.flush()\n' + ) + instrumented.append(line) + script_path.write_text("".join(instrumented)) + + # Stream ipython output live to this process's stdout/stderr so CI logs + # show progress in real time. Tee a copy to the log file via Popen. + import shlex + cmd = ["ipython", "--colors=NoColor", "--no-banner", + "--InteractiveShell.history_load_length=0", str(script_path)] + print(f"\n=== executing notebook (streaming) ===\n {shlex.join(cmd)}", flush=True) + log_f = log_path.open("a") + log_f.write(f"\n=== execute (streaming) ===\n") + log_f.flush() + proc = subprocess.Popen( + cmd, cwd=str(nb_dir), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1, + ) + captured = [] try: - r = run( - ["ipython", "--colors=NoColor", "--no-banner", - "--InteractiveShell.history_load_length=0", str(script_path)], - cwd=str(nb_dir), - timeout=args.timeout, - ) + for line in proc.stdout: + sys.stdout.write(line) + sys.stdout.flush() + log_f.write(line) + captured.append(line) + proc.wait(timeout=args.timeout) except subprocess.TimeoutExpired: + proc.kill() + log_f.close() return finalize("execute", False, error=f"hit overall {args.timeout}s timeout") + log_f.close() - with log_path.open("a") as f: - f.write(f"\n=== execute rc={r.returncode} ===\n") - f.write(f"--- stdout (tail) ---\n{r.stdout[-2000:]}\n") - f.write(f"--- stderr (tail) ---\n{r.stderr[-3000:]}\n") - + full_output = "".join(captured) looks_failed = ( - r.returncode != 0 - or "Traceback (most recent call last)" in r.stderr + proc.returncode != 0 + or "Traceback (most recent call last)" in full_output ) if looks_failed: - return finalize("execute", False, error=r.stderr[-3000:]) + return finalize("execute", False, error=full_output[-3000:]) return finalize("done", True) From 2c46b1ba5dfa9a264f1edf0c61d9569c37f646a3 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 13 May 2026 15:05:13 -0400 Subject: [PATCH 3/3] 000718_demo: subsample EEG signal for visualization (avoid OOM) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EEG plot cell did: ax.plot(timestamps, eeg_signal.data[:]) This loads the full ~1-week EEG recording into memory (~60M+ samples through remfile HTTP range requests, then matplotlib trying to render every point). CI consistently SIGTERMs ~80s into execution at this cell — the runner gets OOM-killed. Subsample to ~50k points for the overview plot. Users who want the full-resolution signal can slice `eeg_signal.data[start:stop]` for a specific time window. This is also a real UX improvement on Colab — plotting 60M points was unusably slow there too. Caught after PR-#162's CI run showed the kill happens right after the ::cell 41:: progress marker. Co-Authored-By: Claude Opus 4.7 (1M context) --- 000718/CaiLab/zaki_2024/000718_demo.ipynb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/000718/CaiLab/zaki_2024/000718_demo.ipynb b/000718/CaiLab/zaki_2024/000718_demo.ipynb index 2352645..9b444e1 100644 --- a/000718/CaiLab/zaki_2024/000718_demo.ipynb +++ b/000718/CaiLab/zaki_2024/000718_demo.ipynb @@ -4062,9 +4062,20 @@ ], "source": [ "eeg_signal = nwbfile.acquisition[\"EEGSignal\"]\n", + "\n", + "# This recording spans roughly a week at the EEG sample rate, so loading and\n", + "# plotting `eeg_signal.data[:]` is hundreds of millions of points — too large\n", + "# for matplotlib (and too slow over remfile streaming). Subsample to ~50k\n", + "# points for the overview plot; users who want full-resolution data can slice\n", + "# `eeg_signal.data[start:stop]` directly.\n", + "n = eeg_signal.data.shape[0]\n", + "stride = max(1, n // 50_000)\n", + "ts_view = timestamps[::stride]\n", + "data_view = eeg_signal.data[::stride]\n", + "\n", "fig, ax = plt.subplots(figsize=(18, 6))\n", - "ax.plot(timestamps, eeg_signal.data[:])\n", - "ax.set_title(\"EEG signal\")\n", + "ax.plot(ts_view, data_view)\n", + "ax.set_title(f\"EEG signal (subsampled 1:{stride})\")\n", "ax.set_xlabel(\"Time (s)\")\n", "ax.set_ylabel(eeg_signal.unit)\n", "plt.show()"