PennLINC
diff --git a/‎docs/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/outputs.rst‎
Lines changed: 175 additions & 0 deletions b/‎docs/outputs.rst‎
Lines changed: 175 additions & 0 deletions
diff --git a/‎src/modelarrayio/cli/cifti_to_h5.py‎
Lines changed: 50 additions & 25 deletions b/‎src/modelarrayio/cli/cifti_to_h5.py‎
Lines changed: 50 additions & 25 deletions
diff --git a/‎src/modelarrayio/cli/h5_to_mif.py‎
Lines changed: 1 addition & 1 deletion b/‎src/modelarrayio/cli/h5_to_mif.py‎
Lines changed: 1 addition & 1 deletion
@@ -14,4 +14,5 @@
    installation
    auto_examples/index
    usage
+   outputs
    api
@@ -0,0 +1,175 @@
+#######
+Outputs
+#######
+
+This page describes what each CLI command writes, how files are named, and what data
+is stored inside each output artifact.
+
+
+*****************
+Commands Overview
+*****************
+
+The commands fall into two groups:
+
+- ``*-to-h5`` commands: convert input neuroimaging data into either:
+  - one or more HDF5 files (``--backend hdf5``), or
+  - one or more TileDB directories (``--backend tiledb``).
+- ``h5-to-*`` commands: convert analysis results stored in an HDF5 file into image files.
+
+
+*********************
+nifti-to-h5 (volumes)
+*********************
+
+Default output name (HDF5 backend): ``voxelarray.h5``.
+
+HDF5 output contents:
+
+- ``voxels`` dataset:
+  - transposed voxel table with rows for ``voxel_id``, ``i``, ``j``, ``k``.
+  - attribute ``column_names = ['voxel_id', 'i', 'j', 'k']``.
+- Per scalar:
+  - ``scalars/<scalar_name>/values`` with shape ``(n_subjects, n_voxels)``.
+  - ``scalars/<scalar_name>/column_names`` listing source file names.
+
+TileDB output contents:
+
+- Per scalar dense array at ``scalars/<scalar_name>/values`` with shape
+  ``(n_subjects, n_voxels)``.
+- Column names are stored in array metadata (``column_names``).
+
+When ``--scalar-columns`` is provided:
+
+- Output is split by scalar column name.
+- Example: ``--scalar-columns alpha beta --output voxelarray.h5`` writes:
+  - ``alpha_voxelarray.h5``
+  - ``beta_voxelarray.h5``
+- The same prefix rule also applies to TileDB output paths.
+
+
+*******************
+cifti-to-h5 (CIFTI)
+*******************
+
+Default output name (HDF5 backend): ``greyordinatearray.h5``.
+
+HDF5 output contents:
+
+- ``greyordinates`` dataset:
+  - transposed table with rows for ``vertex_id`` and ``structure_id``.
+  - attribute ``column_names = ['vertex_id', 'structure_id']``.
+  - attribute ``structure_names`` listing CIFTI brain structures.
+- Per scalar:
+  - ``scalars/<scalar_name>/values`` with shape ``(n_subjects, n_greyordinates)``.
+  - ``scalars/<scalar_name>/column_names`` listing source file names.
+
+TileDB output contents:
+
+- Per scalar dense array at ``scalars/<scalar_name>/values`` with shape
+  ``(n_subjects, n_greyordinates)``.
+- Column names metadata is written on each scalar matrix.
+- An explicit TileDB array is also written at ``scalars/<scalar_name>/column_names``.
+
+When ``--scalar-columns`` is provided:
+
+- Output is split by scalar column name.
+- Example: ``--scalar-columns alpha beta --output greyordinatearray.h5`` writes:
+  - ``alpha_greyordinatearray.h5``
+  - ``beta_greyordinatearray.h5``
+- The same prefix rule also applies to TileDB output paths.
+
+
+******************
+mif-to-h5 (fixels)
+******************
+
+Default output name (HDF5 backend): ``fixelarray.h5``.
+
+HDF5 output contents:
+
+- ``fixels`` dataset:
+  - transposed fixel table (``fixel_id``, coordinates/directions metadata from input fixel DB).
+  - attribute ``column_names`` containing table column names.
+- ``voxels`` dataset:
+  - transposed voxel table with ``voxel_id``, ``i``, ``j``, ``k``.
+  - attribute ``column_names`` containing table column names.
+- Per scalar:
+  - ``scalars/<scalar_name>/values`` with shape ``(n_subjects, n_fixels)``.
+  - ``scalars/<scalar_name>/column_names`` listing source file names.
+
+TileDB output contents:
+
+- Per scalar dense array at ``scalars/<scalar_name>/values`` with shape
+  ``(n_subjects, n_fixels)``.
+- Column names are stored in array metadata (``column_names``).
+
+When ``--scalar-columns`` is provided:
+
+- Output is split by scalar column name.
+- Example: ``--scalar-columns alpha beta --output fixelarray.h5`` writes:
+  - ``alpha_fixelarray.h5``
+  - ``beta_fixelarray.h5``
+- The same prefix rule also applies to TileDB output paths.
+
+
+***********************************
+h5-to-* commands (result exporters)
+***********************************
+
+These commands read statistical results from:
+
+- ``results/<analysis_name>/results_matrix`` (shape: ``(n_results, n_elements)``).
+
+Result names are read in this order:
+
+- ``results_matrix.attrs['colnames']`` (if present),
+- ``results/<analysis_name>/column_names`` dataset,
+- ``results/<analysis_name>/results_matrix/column_names`` dataset,
+- fallback names: ``component001``, ``component002``, ...
+
+Any spaces or ``/`` in result names are replaced with ``_`` in filenames.
+
+
+h5-to-nifti
+===========
+
+Writes one file per result to ``--output-dir``:
+
+- ``<analysis_name>_<result_name><output_ext>`` (default extension ``.nii.gz``).
+- If a result name contains ``p.value``, an additional file is written:
+  ``<analysis_name>_<result_name_with_1m.p.value><output_ext>``,
+  containing ``1 - p.value``.
+
+Each output volume uses ``--group-mask-file`` to map vectorized results back into 3D space.
+
+
+h5-to-cifti
+===========
+
+Writes one CIFTI dscalar file per result to ``--output-dir``:
+
+- ``<analysis_name>_<result_name>.dscalar.nii``.
+- If a result name contains ``p.value``, also writes the ``1 - p.value`` companion file
+  with ``1m.p.value`` in its name.
+
+The header is taken from ``--example-cifti`` (or from the first cohort ``source_file`` if
+``--cohort-file`` is used instead).
+
+
+h5-to-mif
+=========
+
+Writes one MIF file per result to ``--output-dir``:
+
+- ``<analysis_name>_<result_name>.mif``.
+- If a result name contains ``p.value``, also writes the ``1 - p.value`` companion file
+  with ``1m.p.value`` in its name.
+
+Also copies these files into ``--output-dir``:
+
+- ``--index-file``
+- ``--directions-file``
+
+The output MIF geometry/header template is taken from ``--example-mif`` (or from the first
+cohort ``source_file`` if ``--cohort-file`` is used instead).
@@ -4,7 +4,6 @@
 
 import argparse
 import logging
-import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 
@@ -13,14 +12,13 @@
 from tqdm import tqdm
 
 from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import add_scalar_columns_arg, add_to_modelarray_args
+from modelarrayio.cli.parser_utils import add_to_modelarray_args
 from modelarrayio.utils.cifti import (
-    _build_scalar_sources,
-    _cohort_to_long_dataframe,
-    _load_cohort_cifti,
     brain_names_to_dataframe,
     extract_cifti_scalar_data,
+    load_cohort_cifti,
 )
+from modelarrayio.utils.misc import build_scalar_sources, cohort_to_long_dataframe
 
 logger = logging.getLogger(__name__)
 
@@ -35,7 +33,7 @@ def cifti_to_h5(
     shuffle=True,
     chunk_voxels=0,
     target_chunk_mb=2.0,
-    workers=None,
+    workers=1,
     s3_workers=1,
     scalar_columns=None,
 ):
@@ -47,7 +45,7 @@ def cifti_to_h5(
         Path to a csv with demographic info and paths to data
     backend : :obj:`str`
         Backend to use for storage (``'hdf5'`` or ``'tiledb'``)
-    output : :obj:`str`
+    output : :obj:`pathlib.Path`
         Output path. For the hdf5 backend, path to an .h5 file;
         for the tiledb backend, path to a .tdb directory.
     storage_dtype : :obj:`str`
@@ -64,7 +62,7 @@ def cifti_to_h5(
     target_chunk_mb : :obj:`float`
         Target chunk/tile size in MiB when auto-computing the spatial axis length
     workers : :obj:`int`
-        Maximum number of parallel TileDB write workers (``None`` = auto).
+        Maximum number of parallel TileDB write workers. Default 1.
         Has no effect when ``backend='hdf5'``.
     s3_workers : :obj:`int`
         Number of workers for parallel S3 downloads
@@ -77,19 +75,49 @@ def cifti_to_h5(
         0 if successful, 1 if failed.
     """
     cohort_df = pd.read_csv(cohort_file)
-    cohort_long = _cohort_to_long_dataframe(cohort_df, scalar_columns=scalar_columns)
-    output_path = Path(output)
+    cohort_long = cohort_to_long_dataframe(cohort_df, scalar_columns=scalar_columns)
     if cohort_long.empty:
         raise ValueError('Cohort file does not contain any scalar entries after normalization.')
-    scalar_sources = _build_scalar_sources(cohort_long)
+    scalar_sources = build_scalar_sources(cohort_long)
     if not scalar_sources:
         raise ValueError('Unable to derive scalar sources from cohort file.')
+    scalar_names = list(scalar_sources.keys())
+    split_scalar_outputs = bool(scalar_columns)
 
     if backend == 'hdf5':
-        scalars, last_brain_names = _load_cohort_cifti(cohort_long, s3_workers)
+        if split_scalar_outputs:
+            scalars, last_brain_names = load_cohort_cifti(cohort_long, s3_workers)
+            greyordinate_table, structure_names = brain_names_to_dataframe(last_brain_names)
+            outputs: list[Path] = []
+            for scalar_name in scalar_names:
+                scalar_output = cli_utils.prepare_output_parent(
+                    cli_utils.prefixed_output_path(output, scalar_name)
+                )
+                with h5py.File(scalar_output, 'w') as h5_file:
+                    cli_utils.write_table_dataset(
+                        h5_file,
+                        'greyordinates',
+                        greyordinate_table,
+                        extra_attrs={'structure_names': structure_names},
+                    )
+                    cli_utils.write_hdf5_scalar_matrices(
+                        h5_file,
+                        {scalar_name: scalars[scalar_name]},
+                        {scalar_name: scalar_sources[scalar_name]},
+                        storage_dtype=storage_dtype,
+                        compression=compression,
+                        compression_level=compression_level,
+                        shuffle=shuffle,
+                        chunk_voxels=chunk_voxels,
+                        target_chunk_mb=target_chunk_mb,
+                    )
+                outputs.append(scalar_output)
+            return int(not all(path.exists() for path in outputs))
+
+        scalars, last_brain_names = load_cohort_cifti(cohort_long, s3_workers)
         greyordinate_table, structure_names = brain_names_to_dataframe(last_brain_names)
-        output_path = cli_utils.prepare_output_parent(output_path)
-        with h5py.File(output_path, 'w') as h5_file:
+        output = cli_utils.prepare_output_parent(output)
+        with h5py.File(output, 'w') as h5_file:
             cli_utils.write_table_dataset(
                 h5_file,
                 'greyordinates',
@@ -107,9 +135,8 @@ def cifti_to_h5(
                 chunk_voxels=chunk_voxels,
                 target_chunk_mb=target_chunk_mb,
             )
-        return int(not output_path.exists())
+        return int(not output.exists())
 
-    output_path.mkdir(parents=True, exist_ok=True)
     if not scalar_sources:
         return 0
 
@@ -126,8 +153,13 @@ def _process_scalar_job(scalar_name, source_files):
             rows.append(cifti_data)
 
         if rows:
+            scalar_output = (
+                cli_utils.prefixed_output_path(output, scalar_name)
+                if split_scalar_outputs
+                else output
+            )
             cli_utils.write_tiledb_scalar_matrices(
-                output_path,
+                scalar_output,
                 {scalar_name: rows},
                 {scalar_name: source_files},
                 storage_dtype=storage_dtype,
@@ -140,13 +172,7 @@ def _process_scalar_job(scalar_name, source_files):
             )
             return scalar_name
 
-    scalar_names = list(scalar_sources.keys())
-    worker_count = workers if isinstance(workers, int) and workers > 0 else None
-    if worker_count is None:
-        cpu_count = os.cpu_count() or 1
-        worker_count = min(len(scalar_names), max(1, cpu_count))
-    else:
-        worker_count = min(len(scalar_names), worker_count)
+    worker_count = min(len(scalar_names), workers)
 
     if worker_count <= 1:
         for scalar_name in scalar_names:
@@ -178,5 +204,4 @@ def _parse_cifti_to_h5():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     add_to_modelarray_args(parser, default_output='greyordinatearray.h5')
-    add_scalar_columns_arg(parser)
     return parser
@@ -14,7 +14,7 @@
 
 from modelarrayio.cli import utils as cli_utils
 from modelarrayio.cli.parser_utils import _is_file, add_from_modelarray_args, add_log_level_arg
-from modelarrayio.utils.fixels import mif_to_nifti2, nifti2_to_mif
+from modelarrayio.utils.mif import mif_to_nifti2, nifti2_to_mif
 
 logger = logging.getLogger(__name__)