Skip to content

Commit 3e4f44d

Browse files
author
OutlyingWest
committed
Fix spinner animation with tqdm output and refactor Score-P stream reading
- Spinner no longer overlaps with tqdm due to proper threading.Event handling - Score-P stdout and stderr are read in separate threads with chunked reading - Animation is disabled in multicell mode when needed - README.md and error logging improved
1 parent a8973df commit 3e4f44d

3 files changed

Lines changed: 123 additions & 71 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ To see the detailed report for marshalling steps - `SCOREP_JUPYTER_MARSHALLING_D
119119
%env SCOREP_JUPYTER_MARSHALLING_DETAILED_REPORT=1
120120
```
121121
You can disable visual animations shown during long-running tasks by setting the `SCOREP_JUPYTER_DISABLE_PROCESSING_ANIMATIONS` environment variable.
122+
This can be useful for debugging, as it ensures that any error messages from your code in cells are shown without being overwritten.
123+
It is also helpful when running code that produces its own progress bars (e.g., using `tqdm`), to prevent output from being obscured.
122124
```
123125
%env SCOREP_JUPYTER_DISABLE_PROCESSING_ANIMATIONS=1
124126
```

src/scorep_jupyter/kernel.py

Lines changed: 108 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111

1212
from enum import Enum
1313
from textwrap import dedent
14+
from statistics import mean
15+
from typing import IO, AnyStr, Callable
16+
17+
import pandas as pd
1418
from ipykernel.ipkernel import IPythonKernel
1519
from scorep_jupyter.userpersistence import PersHelper, scorep_script_name
1620
from scorep_jupyter.userpersistence import magics_cleanup, create_busy_spinner
@@ -459,6 +463,7 @@ async def scorep_execute(
459463
allow_stdin=False,
460464
*,
461465
cell_id=None,
466+
is_multicell_final=False,
462467
):
463468
"""
464469
Execute given code with Score-P Python bindings instrumentation.
@@ -563,28 +568,7 @@ async def scorep_execute(
563568
self.pershelper.postprocess()
564569
return reply_status_dump
565570

566-
# Empty cell output, required for interactive output
567-
# e.g. tqdm for-loop progress bar
568-
self.cell_output("\0")
569-
570-
stdout_lock = threading.Lock()
571-
process_busy_spinner = create_busy_spinner(stdout_lock)
572-
process_busy_spinner.start("Process is running...")
573-
574-
# Due to splitting into scorep-kernel and ipython extension,
575-
# multicell mode is not supported for coarse-grained measurements
576-
# anymore (in the extension) and we do not show the single cells in
577-
# the ipython extension visualizations after executing them with scorep
578-
# however, since we are using scorep anyway, the ipython extension is
579-
# not useful, since we can count hardware counters anyway
580-
# multicellmode_timestamps = []
581-
582-
try:
583-
# multicellmode_timestamps =
584-
self.read_scorep_process_pipe(proc, stdout_lock)
585-
process_busy_spinner.stop("Done.")
586-
except KeyboardInterrupt:
587-
process_busy_spinner.stop("Kernel interrupted.")
571+
multicellmode_timestamps = self.start_reading_scorep_process_streams(proc, is_multicell_final)
588572

589573
# In disk mode, subprocess already terminated
590574
# after dumping persistence to file
@@ -669,67 +653,130 @@ async def scorep_execute(
669653
self.pershelper.postprocess()
670654
return self.standard_reply()
671655

672-
def read_scorep_process_pipe(
673-
self, proc: subprocess.Popen[bytes], stdout_lock: threading.Lock
656+
def start_reading_scorep_process_streams(
657+
self,
658+
proc: subprocess.Popen[bytes],
659+
is_multicell_final: bool,
674660
) -> list:
675661
"""
676662
This function reads stdout and stderr of the subprocess running with
677663
Score-P instrumentation independently.
678-
It logs all stderr output, collects lines containing
679-
the marker "MCM_TS" (used to identify multi-cell mode timestamps) into
680-
a list, and sends the remaining
681-
stdout lines to the Jupyter cell output.
682664
683665
Simultaneous access to stdout is synchronized via a lock to prevent
684-
overlapping with another thread performing
666+
overlapping with stderr reading thread and thread performing
685667
long-running process animation.
686668
687669
Args:
688-
proc (subprocess.Popen[bytes]): The subprocess whose output is
689-
being read.
690-
stdout_lock (threading.Lock): Lock to avoid output overlapping
670+
proc (subprocess.Popen[bytes]): The subprocess whose output is being read.
671+
is_multicell_final (bool): If multicell mode is finalizing - spinner must be disabled.
691672
692673
Returns:
693674
list: A list of decoded strings containing "MCM_TS" timestamps.
694675
"""
676+
677+
stdout_lock = threading.Lock()
678+
spinner_stop_event = threading.Event()
679+
process_busy_spinner = create_busy_spinner(stdout_lock, spinner_stop_event, is_multicell_final)
680+
process_busy_spinner.start("Process is running...")
681+
695682
multicellmode_timestamps = []
696-
sel = selectors.DefaultSelector()
697683

698-
sel.register(proc.stdout, selectors.EVENT_READ)
699-
sel.register(proc.stderr, selectors.EVENT_READ)
684+
# Empty cell output, required for interactive output
685+
# e.g. tqdm for-loop progress bar
686+
self.cell_output("\0")
700687

701-
line_width = 50
702-
clear_line = "\r" + " " * line_width + "\r"
688+
try:
689+
t_stderr = threading.Thread(
690+
target=self.read_scorep_stderr,
691+
args=(
692+
proc.stderr,
693+
stdout_lock,
694+
spinner_stop_event)
695+
)
696+
t_stderr.start()
703697

704-
while True:
705-
# Select between stdout and stderr
706-
for key, val in sel.select():
707-
line = key.fileobj.readline()
708-
if not line:
709-
sel.unregister(key.fileobj)
710-
continue
711-
712-
decoded_line = line.decode(
713-
sys.getdefaultencoding(), errors="ignore"
714-
)
698+
multicellmode_timestamps = self.read_scorep_stdout(proc.stdout, stdout_lock, spinner_stop_event)
699+
700+
t_stderr.join()
701+
process_busy_spinner.stop("Done.")
702+
703+
except KeyboardInterrupt:
704+
process_busy_spinner.stop("Kernel interrupted.")
705+
706+
if multicellmode_timestamps:
707+
self.log.debug(f'{multicellmode_timestamps = }')
708+
else:
709+
self.log.debug(f'"multicellmode_timestamps" is empty.')
710+
711+
712+
return multicellmode_timestamps
713+
714+
def read_scorep_stream(
715+
self,
716+
stream: IO[AnyStr],
717+
lock: threading.Lock,
718+
process_line: Callable[[str], None],
719+
read_chunk_size: int = 64,
720+
):
721+
incomplete_line = ""
722+
endline_pattern = re.compile(r"(.*?[\r\n]|.+$)")
715723

716-
if key.fileobj is proc.stderr:
717-
with stdout_lock:
718-
self.log.warning(f"{decoded_line.strip()}")
719-
elif "MCM_TS" in decoded_line:
720-
multicellmode_timestamps.append(decoded_line)
721-
else:
722-
with stdout_lock:
723-
sys.stdout.write(clear_line)
724-
sys.stdout.flush()
725-
self.cell_output(decoded_line)
726724

727-
# If both stdout and stderr empty -> out of loop
728-
if not sel.get_map():
725+
726+
while True:
727+
chunk = stream.read(read_chunk_size)
728+
if not chunk:
729729
break
730+
chunk = chunk.decode(sys.getdefaultencoding(), errors="ignore")
731+
lines = endline_pattern.findall(chunk)
732+
if lines:
733+
lines[0] = incomplete_line + lines[0]
734+
if lines[-1][-1] not in ["\n", "\r"]:
735+
incomplete_line = lines.pop(-1)
736+
else:
737+
incomplete_line = ""
738+
for line in lines:
739+
with lock:
740+
process_line(line)
741+
742+
def read_scorep_stdout(
743+
self,
744+
stdout: IO[AnyStr],
745+
lock: threading.Lock,
746+
spinner_stop_event: threading.Event,
747+
read_chunk_size=64,
748+
) -> list:
749+
multicellmode_timestamps = []
750+
line_width = 50
751+
clear_line = "\r" + " " * line_width + "\r"
730752

753+
def process_stdout_line(line: str):
754+
if "MCM_TS" in line:
755+
multicellmode_timestamps.append(line)
756+
else:
757+
if spinner_stop_event.is_set():
758+
sys.stdout.write(clear_line)
759+
sys.stdout.flush()
760+
self.cell_output(line)
761+
762+
self.read_scorep_stream(stdout, lock, process_stdout_line, read_chunk_size)
731763
return multicellmode_timestamps
732764

765+
def read_scorep_stderr(
766+
self,
767+
stderr: IO[AnyStr],
768+
lock: threading.Lock,
769+
spinner_stop_event: threading.Event,
770+
read_chunk_size=64,
771+
):
772+
def process_stderr_line(line: str):
773+
if spinner_stop_event.is_set():
774+
self.cell_output(line)
775+
self.log.error(line)
776+
777+
self.read_scorep_stream(stderr, lock, process_stderr_line, read_chunk_size)
778+
779+
733780
async def do_execute(
734781
self,
735782
code,
@@ -778,6 +825,7 @@ async def do_execute(
778825
user_expressions,
779826
allow_stdin,
780827
cell_id=cell_id,
828+
is_multicell_final=True,
781829
)
782830
except Exception:
783831
self.cell_output(

src/scorep_jupyter/userpersistence.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import uuid
1212
import importlib
1313

14+
1415
scorep_script_name = "scorep_script.py"
1516

1617

@@ -442,7 +443,7 @@ def magics_cleanup(code):
442443

443444

444445
class BaseSpinner:
445-
def __init__(self, lock=None):
446+
def __init__(self, lock=None, stop_event=None):
446447
pass
447448

448449
def _spinner_task(self):
@@ -459,22 +460,23 @@ def stop(self, done_message="Done."):
459460

460461

461462
class BusySpinner(BaseSpinner):
462-
def __init__(self, lock=None):
463+
def __init__(self, lock=None, stop_event=None):
463464
super().__init__(lock)
464465
self._lock = lock or threading.Lock()
465-
self._stop_event = threading.Event()
466+
self._stop_event = stop_event or threading.Event()
466467
self._thread = threading.Thread(target=self._spinner_task)
467468
self.working_message = ""
468469
self.done_message = ""
469470

470471
def _spinner_task(self):
471472
spinner_chars = "|/-\\"
473+
clear_line = " " * 50
472474
idx = 0
473475
while not self._stop_event.is_set():
474476
with self._lock:
475477
sys.stdout.write(
476478
f"\r{self.working_message} "
477-
f"{spinner_chars[idx % len(spinner_chars)]}"
479+
f"{spinner_chars[idx % len(spinner_chars)]}{clear_line}"
478480
)
479481
sys.stdout.flush()
480482
time.sleep(0.1)
@@ -498,11 +500,11 @@ def stop(self, done_message="Done."):
498500
self._thread.join()
499501

500502

501-
def create_busy_spinner(lock=None):
502-
is_enabled = (
503-
os.getenv("SCOREP_JUPYTER_DISABLE_PROCESSING_ANIMATIONS") != "1"
504-
)
505-
if is_enabled:
506-
return BusySpinner(lock)
503+
def create_busy_spinner(lock=None, stop_event=None, is_multicell_final=False):
504+
is_enabled = os.getenv("SCOREP_JUPYTER_DISABLE_PROCESSING_ANIMATIONS") != "1"
505+
if is_enabled and not is_multicell_final:
506+
return BusySpinner(lock, stop_event)
507507
else:
508-
return BaseSpinner(lock)
508+
if stop_event:
509+
stop_event.set()
510+
return BaseSpinner()

0 commit comments

Comments
 (0)