score-p
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/ExampleBasic.ipynb‎
Lines changed: 51 additions & 1 deletion b/‎examples/ExampleBasic.ipynb‎
Lines changed: 51 additions & 1 deletion
diff --git a/‎src/jumper/kernel.py‎
Lines changed: 111 additions & 50 deletions b/‎src/jumper/kernel.py‎
Lines changed: 111 additions & 50 deletions
@@ -152,6 +152,7 @@ MODE=[disk,memory]
 ```
 
 When using persistence in `disk` mode, user can also define directory to which serializer output will be saved with `SCOREP_KERNEL_PERSISTENCE_DIR` environment variable.
+To see the detailed report for marshalling steps - `MARSHALLING_DETAILED_REPORT=1` environment variable can be set.
 
 `%%execute_with_scorep`
 
@@ -235,6 +236,11 @@ Similar yields for cloudpickle. Use the `%%marshalling_settings` magic command t
 
 When dealing with big data structures, there might be a big runtime overhead at the beginning and the end of a Score-P cell. This is due to additional data saving and loading processes for persistency in the background. However this does not affect the actual user code and the Score-P measurements.
 
+## Logging Configuration
+To adjust logging and obtain more detailed output about the behavior of the JUmPER kernel, refer to the `src/logging_config.py` file.
+
+This file contains configuration options for controlling the verbosity, format, and destination of log messages. You can customize it to suit your debugging or monitoring needs.
+
 # Future Work
 
 The kernel is still under development. The following is on the agenda:
 
@@ -466,7 +466,57 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": []
+   "source": [
+    "### Large array processing with Score-P\n",
+    "This example illustrates the steps involved in the marshalling process when a cell instrumented with Score-P is executed with a large data payload as input.\n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "import time\n",
+    "import numpy as np\n",
+    "\n",
+    "def generate_array_with_size(size_mb, dtype=np.float32):\n",
+    "    size_bytes = size_mb * 1024 * 1024\n",
+    "    element_size = np.dtype(dtype).itemsize\n",
+    "    num_elements = size_bytes // element_size\n",
+    "    array = np.zeros(num_elements, dtype=dtype)\n",
+    "    return array\n",
+    "\n",
+    "big_array = generate_array_with_size(size_mb=1000)"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "Enable marshalling detailed report for each step."
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "%env MARSHALLING_DETAILED_REPORT=1"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "Run cell with Score-P"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "%%execute_with_scorep\n",
+    "big_array\n",
+    "time.sleep(4)"
+   ]
   }
  ],
  "metadata": {
 
@@ -2,10 +2,14 @@
 import json
 import os
 import re
+import selectors
 import subprocess
 import sys
+import threading
 import time
 import shutil
+import logging
+import logging.config
 
 from enum import Enum
 from textwrap import dedent
@@ -14,13 +18,15 @@
 from ipykernel.ipkernel import IPythonKernel
 from itables import show
 from jumper.userpersistence import PersHelper, scorep_script_name
-from jumper.userpersistence import magics_cleanup
+from jumper.userpersistence import magics_cleanup, create_busy_spinner
 import importlib
 from jumper.perfdatahandler import PerformanceDataHandler
 import jumper.visualization as perfvis
 
 # import jumper.multinode_monitor.slurm_monitor as slurm_monitor
 
+from jumper.logging_config import LOGGING
+
 PYTHON_EXECUTABLE = sys.executable
 READ_CHUNK_SIZE = 8
 userpersistence_token = "jumper.userpersistence"
@@ -103,6 +109,9 @@ def __init__(self, **kwargs):
         except ModuleNotFoundError:
             self.scorep_python_available_ = False
 
+        logging.config.dictConfig(LOGGING)
+        self.log = logging.getLogger('kernel')
+
     def cell_output(self, string, stream="stdout"):
         """
         Display string as cell output.
@@ -683,16 +692,21 @@ async def scorep_execute(
         """
         Execute given code with Score-P Python bindings instrumentation.
         """
+        self.log.info("Executing Score-P instrumented code...")
+        self.pershelper.set_dump_report_level()
         # Set up files/pipes for persistence communication
         if not self.pershelper.preprocess():
             self.pershelper.postprocess()
+            error_message = "Failed to set up persistence communication files/pipes."
+            self.log.error(error_message)
             self.cell_output(
-                "KernelError: Failed to set up the persistence communication "
-                "files/pipes.",
+                f"KernelError: {error_message} ",
                 "stderr",
             )
             return self.standard_reply()
 
+        self.log.debug("Persistence communication set up successfully.")
+
         # Prepare code for the Score-P instrumented execution as subprocess
         # Transmit user persistence and updated sys.path from Jupyter
         # notebook to subprocess After running the code, transmit subprocess
@@ -701,11 +715,14 @@ async def scorep_execute(
             os.open(scorep_script_name, os.O_WRONLY | os.O_CREAT), "w"
         ) as file:
             file.write(self.pershelper.subprocess_wrapper(code))
+        self.log.debug(f"Code written to temporary script: {scorep_script_name}")
+
         # For disk mode use implicit synchronization between kernel and
         # subprocess: await jupyter_dump, subprocess.wait(),
         # await jupyter_update Ghost cell - dump current Jupyter session for
         # subprocess Run in a "silent" way to not increase cells counter
         if self.pershelper.mode == "disk":
+            self.log.debug("Executing Jupyter dump for disk mode.")
             reply_status_dump = await super().do_execute(
                 self.pershelper.jupyter_dump(),
                 silent,
@@ -716,18 +733,24 @@ async def scorep_execute(
             )
 
             if reply_status_dump["status"] != "ok":
+                error_message = "Failed to pickle notebook's persistence."
+                self.log.error(error_message)
                 self.ghost_cell_error(
                     reply_status_dump,
-                    "KernelError: Failed to pickle notebook's persistence.",
+                    f"KernelError: {error_message}",
                 )
                 return reply_status_dump
 
         # Launch subprocess with Jupyter notebook environment
+        self.log.debug("Preparing subprocess execution.")
+
         cmd = (
             [PYTHON_EXECUTABLE, "-m", "scorep"]
             + self.scorep_binding_args
             + [scorep_script_name]
         )
+        self.log.debug(f"Subprocess command: {' '.join(cmd)}")
+
         scorep_env = {
             key: os.environ[key]
             for key in os.environ
@@ -749,13 +772,15 @@ async def scorep_execute(
         minute = dt.strftime("%M")
 
         proc = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=proc_env
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=proc_env
         )
+        self.log.debug(f"Subprocess started with PID {proc.pid}")
 
         self.perfdata_handler.start_perfmonitor(proc.pid)
         # For memory mode jupyter_dump and jupyter_update must be awaited
         # concurrently to the running subprocess
         if self.pershelper.mode == "memory":
+            self.log.debug("Executing Jupyter dump for memory mode.")
             reply_status_dump = await super().do_execute(
                 self.pershelper.jupyter_dump(),
                 silent,
@@ -765,44 +790,25 @@ async def scorep_execute(
                 cell_id=cell_id,
             )
             if reply_status_dump["status"] != "ok":
+                error_message = "Failed to pickle notebook's persistence."
+                self.log.error(error_message)
                 self.ghost_cell_error(
                     reply_status_dump,
-                    "KernelError: Failed to pickle notebook's persistence.",
+                    f"KernelError: {error_message}",
                 )
                 return reply_status_dump
 
-        # Redirect process stderr to stdout and observe the latter
-        # Observing two stream with two threads causes interference in
-        # cell_output in Jupyter notebook
-        # stdout is read in chunks, which are split into lines using
-        # \r or \n as delimiter
-        # Last element in the list might be "incomplete line",
-        # not ending with \n or \r, it is saved
-        # and merged with the first line in the next chunk
-        incomplete_line = ""
-        endline_pattern = re.compile(r"(.*?[\r\n]|.+$)")
         # Empty cell output, required for interactive output
         # e.g. tqdm for-loop progress bar
         self.cell_output("\0")
 
-        multicellmode_timestamps = []
-        while True:
-            chunk = b"" + proc.stdout.read(READ_CHUNK_SIZE)
-            if chunk == b"":
-                break
-            chunk = chunk.decode(sys.getdefaultencoding(), errors="ignore")
-            lines = endline_pattern.findall(chunk)
-            if len(lines) > 0:
-                lines[0] = incomplete_line + lines[0]
-                if lines[-1][-1] not in ["\n", "\r"]:
-                    incomplete_line = lines.pop(-1)
-                else:
-                    incomplete_line = ""
-                for line in lines:
-                    if "MCM_TS" in line:
-                        multicellmode_timestamps.append(line)
-                        continue
-                    self.cell_output(line)
+        stdout_lock = threading.Lock()
+        process_busy_spinner = create_busy_spinner(stdout_lock)
+        process_busy_spinner.start('Process is running...')
+
+        multicellmode_timestamps = self.read_scorep_process_pipe(proc, stdout_lock)
+
+        process_busy_spinner.stop()
 
         # for multiple nodes, we have to add more lists here, one list per node
         # this is required to be in line with the performance data aggregation
@@ -857,21 +863,24 @@ async def scorep_execute(
             self.perfdata_handler.end_perfmonitor()
         )
 
-        # In disk mode, subprocess already terminated
-        # after dumping persistence to file
-        if self.pershelper.mode == "disk":
-            if proc.returncode:
-                self.pershelper.postprocess()
-                self.cell_output(
-                    "KernelError: Cell execution failed, cell persistence "
-                    "was not recorded.",
-                    "stderr",
-                )
-                return self.standard_reply()
+        # Check if the score-p process is running.
+        # This prevents jupyter_update() from getting stuck while reading non-existent temporary files
+        # if something goes wrong during process execution.
+        if proc.poll():
+            self.pershelper.postprocess()
+            error_message = "Cell execution failed, cell persistence was not recorded."
+            self.log.error(error_message)
+            self.cell_output(
+                f"KernelError: {error_message}",
+                "stderr",
+            )
+            return self.standard_reply()
 
         # os_environ_.clear()
         # sys_path_.clear()
 
+        # In disk mode, subprocess already terminated
+        # after dumping persistence to file
         # Ghost cell - load subprocess persistence back to Jupyter notebook
         # Run in a "silent" way to not increase cells counter
         reply_status_update = await super().do_execute(
@@ -883,28 +892,31 @@ async def scorep_execute(
             cell_id=cell_id,
         )
         if reply_status_update["status"] != "ok":
+            error_message = "Failed to load cell's persistence to the notebook."
+            self.log.error(error_message)
             self.ghost_cell_error(
                 reply_status_update,
-                "KernelError: Failed to load cell's persistence to the "
-                "notebook.",
+                f"KernelError: {error_message}"
             )
             return reply_status_update
 
         # In memory mode, subprocess terminates once jupyter_update is
         # executed and pipe is closed
         if self.pershelper.mode == "memory":
-            if proc.returncode:
+            if proc.poll():
                 self.pershelper.postprocess()
+                error_message = "Cell execution failed, cell persistence was not recorded."
+                self.log.error(error_message)
                 self.cell_output(
-                    "KernelError: Cell execution failed, cell persistence "
-                    "was not recorded.",
+                    f"KernelError: {error_message}",
                     "stderr",
                 )
                 return self.standard_reply()
 
         # Determine directory to which trace files were saved by Score-P
         scorep_folder = ""
         if "SCOREP_EXPERIMENT_DIRECTORY" in os.environ:
+            self.log.warning(f'{os.environ["SCOREP_EXPERIMENT_DIRECTORY"]=}')
             scorep_folder = os.environ["SCOREP_EXPERIMENT_DIRECTORY"]
             self.cell_output(
                 f"Instrumentation results can be found in {scorep_folder}"
@@ -942,13 +954,62 @@ async def scorep_execute(
                 )
 
         self.pershelper.postprocess()
+
         if performance_data_nodes:
             self.report_perfdata(performance_data_nodes, duration)
             self.perfdata_handler.append_code(
                 datetime.datetime.now(), code, time_indices
             )
         return self.standard_reply()
 
+
+    def read_scorep_process_pipe(self, proc: subprocess.Popen[bytes], stdout_lock: threading.Lock) -> list:
+        """
+        Reads and processes the output of a subprocess running with Score-P instrumentation.
+        Args:
+            proc (subprocess.Popen[bytes]): The subprocess whose output is being read.
+            stdout_lock (threading.Lock): Lock to avoid output overlapping
+
+        Returns:
+            list: A list of decoded strings containing "MCM_TS" timestamps.
+        """
+        multicellmode_timestamps = []
+        sel = selectors.DefaultSelector()
+
+        sel.register(proc.stdout, selectors.EVENT_READ)
+        sel.register(proc.stderr, selectors.EVENT_READ)
+
+        line_width = 50
+        clear_line = "\r" + " " * line_width + "\r"
+
+        while True:
+            # Select between stdout and stderr
+            for key, val in sel.select():
+                line = key.fileobj.readline()
+                if not line:
+                    sel.unregister(key.fileobj)
+                    continue
+
+                decoded_line = line.decode(sys.getdefaultencoding(), errors='ignore')
+
+                if key.fileobj is proc.stderr:
+                    with stdout_lock:
+                        self.log.warning(f'{decoded_line.strip()}')
+                elif 'MCM_TS' in decoded_line:
+                    multicellmode_timestamps.append(decoded_line)
+                else:
+                    with stdout_lock:
+                        sys.stdout.write(clear_line)
+                        sys.stdout.flush()
+                        self.cell_output(decoded_line)
+
+            # If both stdout and stderr empty -> out of loop
+            if not sel.get_map():
+                break
+
+        return multicellmode_timestamps
+
+
     async def do_execute(
         self,
         code,