diff --git a/.github/notebook-test-exclusions.txt b/.github/notebook-test-exclusions.txt index 9332426..fc6cb64 100644 --- a/.github/notebook-test-exclusions.txt +++ b/.github/notebook-test-exclusions.txt @@ -65,10 +65,6 @@ tutorials/cosyne_2023/advanced_asset_search.ipynb # in headless CI. Works in Colab. 001170/ReimerLab/public_demo/001170_demo.ipynb -# Calls input() to prompt the user; raises EOFError under non-interactive -# nbconvert. Works in Colab where input() is wired to a UI prompt. -000718/CaiLab/zaki_2024/000718_demo.ipynb - # ============================================================================= # Pre-existing notebook content bugs — track + fix in dedicated PRs. # ============================================================================= diff --git a/.github/scripts/run_notebook.py b/.github/scripts/run_notebook.py index 3c5b31e..27bd88c 100644 --- a/.github/scripts/run_notebook.py +++ b/.github/scripts/run_notebook.py @@ -143,30 +143,58 @@ def finalize(stage: str, ok: bool, error: str | None = None, **extra) -> int: r = run(["jupyter", "nbconvert", "--to", "script", "--stdout", str(tmp_nb)]) if r.returncode != 0: return finalize("convert", False, error=r.stderr[-3000:]) - script_path.write_text(r.stdout) - + raw_script = r.stdout + + # Insert progress markers before each `# In[...]` cell separator so the + # CI log shows which cell is running. If the process is killed mid-cell + # (eg OOM), the last marker tells you the offending cell. + instrumented = [] + for line in raw_script.splitlines(keepends=True): + m = re.match(r"# In\[(.*?)\]:", line.strip()) + if m: + cell_label = m.group(1) or "?" + instrumented.append( + f'import sys as _sys; print("::cell {cell_label}::", flush=True); _sys.stderr.flush()\n' + ) + instrumented.append(line) + script_path.write_text("".join(instrumented)) + + # Stream ipython output live to this process's stdout/stderr so CI logs + # show progress in real time. Tee a copy to the log file via Popen. + import shlex + cmd = ["ipython", "--colors=NoColor", "--no-banner", + "--InteractiveShell.history_load_length=0", str(script_path)] + print(f"\n=== executing notebook (streaming) ===\n {shlex.join(cmd)}", flush=True) + log_f = log_path.open("a") + log_f.write(f"\n=== execute (streaming) ===\n") + log_f.flush() + proc = subprocess.Popen( + cmd, cwd=str(nb_dir), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1, + ) + captured = [] try: - r = run( - ["ipython", "--colors=NoColor", "--no-banner", - "--InteractiveShell.history_load_length=0", str(script_path)], - cwd=str(nb_dir), - timeout=args.timeout, - ) + for line in proc.stdout: + sys.stdout.write(line) + sys.stdout.flush() + log_f.write(line) + captured.append(line) + proc.wait(timeout=args.timeout) except subprocess.TimeoutExpired: + proc.kill() + log_f.close() return finalize("execute", False, error=f"hit overall {args.timeout}s timeout") + log_f.close() - with log_path.open("a") as f: - f.write(f"\n=== execute rc={r.returncode} ===\n") - f.write(f"--- stdout (tail) ---\n{r.stdout[-2000:]}\n") - f.write(f"--- stderr (tail) ---\n{r.stderr[-3000:]}\n") - + full_output = "".join(captured) looks_failed = ( - r.returncode != 0 - or "Traceback (most recent call last)" in r.stderr + proc.returncode != 0 + or "Traceback (most recent call last)" in full_output ) if looks_failed: - return finalize("execute", False, error=r.stderr[-3000:]) + return finalize("execute", False, error=full_output[-3000:]) return finalize("done", True) diff --git a/000718/CaiLab/zaki_2024/000718_demo.ipynb b/000718/CaiLab/zaki_2024/000718_demo.ipynb index 1b81266..9b444e1 100644 --- a/000718/CaiLab/zaki_2024/000718_demo.ipynb +++ b/000718/CaiLab/zaki_2024/000718_demo.ipynb @@ -240,8 +240,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", @@ -2125,8 +2123,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", @@ -3964,8 +3960,6 @@ "\n", "dandiset_id = \"000718\"\n", "with DandiAPIClient() as client:\n", - " #This line is necessary when the dataset is in embargoed mode and only owners can view the data, once it will be published this line can be removed.\n", - " client.dandi_authenticate() \n", " asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(nwbfile_path)\n", " s3_url = asset.get_content_url(follow_redirects=1, strip_query=False)\n", "\n", @@ -4068,9 +4062,20 @@ ], "source": [ "eeg_signal = nwbfile.acquisition[\"EEGSignal\"]\n", + "\n", + "# This recording spans roughly a week at the EEG sample rate, so loading and\n", + "# plotting `eeg_signal.data[:]` is hundreds of millions of points — too large\n", + "# for matplotlib (and too slow over remfile streaming). Subsample to ~50k\n", + "# points for the overview plot; users who want full-resolution data can slice\n", + "# `eeg_signal.data[start:stop]` directly.\n", + "n = eeg_signal.data.shape[0]\n", + "stride = max(1, n // 50_000)\n", + "ts_view = timestamps[::stride]\n", + "data_view = eeg_signal.data[::stride]\n", + "\n", "fig, ax = plt.subplots(figsize=(18, 6))\n", - "ax.plot(timestamps, eeg_signal.data[:])\n", - "ax.set_title(\"EEG signal\")\n", + "ax.plot(ts_view, data_view)\n", + "ax.set_title(f\"EEG signal (subsampled 1:{stride})\")\n", "ax.set_xlabel(\"Time (s)\")\n", "ax.set_ylabel(eeg_signal.unit)\n", "plt.show()"