Skip to content

CytoDataFrame does not render whole FOVs #202

@jenna-tomkinson

Description

@jenna-tomkinson

Hello!

CytoDataFrame does an amazing job at the single-cell level! But if I try this on a dataset with whole image quality control metrics, CytoDataFrame does not render. This is because at this level, we do not have bounding boxes or centroids.

Here is the code I ran:

# Find large nuclei outliers for the current plate
blurry_DNA_channel_outliers = find_outliers(
    df=example_df,
    metadata_columns=metadata_columns,
    feature_thresholds={
        "ImageQuality_PowerLogLogSlope_OrigDNA": -2,
    },
)

import logging, sys

logger = logging.getLogger("cytodataframe")
logger.setLevel(logging.DEBUG)

if not logger.handlers:
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter("%(levelname)s: %(message)s")
    ch.setFormatter(formatter)
    logger.addHandler(ch)


# MUST SET DATA AS DATAFRAME FOR OUTLINE DIR TO WORK
blurry_DNA_channel_outliers_cdf = CytoDataFrame(
    data=pd.DataFrame(blurry_DNA_channel_outliers),
)[
    [
        "ImageQuality_PowerLogLogSlope_OrigDNA",
        "FileName_OrigDNA",
    ]
]

Here is the debug output:

DEBUG: Found image columns: ['FileName_OrigDNA', 'FileName_OrigAGP']
DEBUG: Found image columns: ['FileName_OrigDNA', 'FileName_OrigAGP']
DEBUG: Found image path columns: ['PathName_OrigDNA', 'PathName_OrigAGP']
DEBUG: Found image path columns: ['PathName_OrigDNA', 'PathName_OrigAGP']
DEBUG: Found no bounding box columns.
DEBUG: Found no bounding box columns.
DEBUG: Found no compartment center xy columns.
DEBUG: Found no compartment center xy columns.

Here is the error I get if I don't do debug:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/IPython/core/formatters.py:406, in BaseFormatter.__call__(self, obj)
    404     method = get_real_method(obj, self.print_method)
    405     if method is not None:
--> 406         return method()
    407     return None
    408 else:

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:1602, in CytoDataFrame._repr_html_(self, debug)
   1599 self._custom_attrs["_output"].clear_output(wait=True)
   1601 # render fresh HTML for this cell
-> 1602 self._render_output()
   1603 # ensure slider continues to control the output
   1604 self._custom_attrs["_scale_slider"].observe(
   1605     self._on_slider_change, names="value"
   1606 )

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:1526, in CytoDataFrame._render_output(self)
   1524 def _render_output(self: CytoDataFrame_type) -> str:
   1525     # Return a hidden div that nbconvert will keep but Jupyter will ignore
-> 1526     html_content = self._generate_jupyter_dataframe_html()
   1528     with self._custom_attrs["_output"]:
   1529         display(HTML(html_content))

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:1359, in CytoDataFrame._generate_jupyter_dataframe_html(self)
   1352     data = (
   1353         data
   1354         if image_paths_externally_joined or bounding_box_externally_joined
   1355         else self.copy()
   1356     )
   1358 # determine if we have image_cols to display
-> 1359 image_cols = CytoDataFrame(data).find_image_columns() or []
   1360 # normalize both the set of image cols and the pool of all cols to strings
   1361 all_cols_str, all_cols_back = self._normalize_labels(data.columns)

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:279, in CytoDataFrame.__init__(self, data, data_context_dir, data_image_paths, data_bounding_box, compartment_center_xy, data_mask_context_dir, data_outline_context_dir, segmentation_file_regex, image_adjustment, display_options, *args, **kwargs)
    264 self._custom_attrs["data_bounding_box"] = (
    265     self.get_bounding_box_from_data()
    266     if data_bounding_box is None
    267     else data_bounding_box
    268 )
    270 self._custom_attrs["compartment_center_xy"] = (
    271     self.get_compartment_center_xy_from_data()
    272     if compartment_center_xy is None or compartment_center_xy is True
   (...)
    275     else None
    276 )
    278 self._custom_attrs["data_image_paths"] = (
--> 279     self.get_image_paths_from_data(image_cols=self.find_image_columns())
    280     if data_image_paths is None
    281     else data_image_paths
    282 )
    284 # Wrap methods so they return CytoDataFrames
    285 # instead of Pandas DataFrames.
    286 self._wrap_methods()

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:727, in CytoDataFrame.get_image_paths_from_data(self, image_cols)
    706 def get_image_paths_from_data(
    707     self: CytoDataFrame_type, image_cols: List[str]
    708 ) -> Dict[str, str]:
    709     """
    710     Gather data containing image path names
    711     (the directory storing the images but not the file
   (...)
    724 
    725     """
--> 727     image_path_columns = [
    728         col.replace("FileName", "PathName")
    729         for col in image_cols
    730         if col.replace("FileName", "PathName") in self.columns
    731     ]
    733     logger.debug("Found image path columns: %s", image_path_columns)
    735     return self.filter(items=image_path_columns) if image_path_columns else None

File ~/mambaforge/envs/pccma_atlas_whole_img_qc_env/lib/python3.10/site-packages/cytodataframe/frame.py:730, in <listcomp>(.0)
    706 def get_image_paths_from_data(
    707     self: CytoDataFrame_type, image_cols: List[str]
    708 ) -> Dict[str, str]:
    709     """
    710     Gather data containing image path names
    711     (the directory storing the images but not the file
   (...)
    724 
    725     """
    727     image_path_columns = [
    728         col.replace("FileName", "PathName")
    729         for col in image_cols
--> 730         if col.replace("FileName", "PathName") in self.columns
    731     ]
    733     logger.debug("Found image path columns: %s", image_path_columns)
    735     return self.filter(items=image_path_columns) if image_path_columns else None

AttributeError: 'int' object has no attribute 'replace'

Solution: Do not render as CytoDataFrame. This will then not yield the error, but manual inspection is required. Would love to see added functionality here if it fits within the scope of the software.

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions