PennLINC
diff --git a/‎.circleci/config.yml‎
Lines changed: 0 additions & 108 deletions b/‎.circleci/config.yml‎
Lines changed: 0 additions & 108 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/modelarrayio/cli/cifti_to_h5.py‎
Lines changed: 66 additions & 84 deletions b/‎src/modelarrayio/cli/cifti_to_h5.py‎
Lines changed: 66 additions & 84 deletions
@@ -24,8 +24,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
-      - run: pipx run ruff check .
-      - run: pipx run ruff format --diff .
+      - run: pipx run ruff check src test
+      - run: pipx run ruff format --diff src test
 
   codespell:
     name: Check for spelling errors
 
@@ -100,6 +100,7 @@ exclude = ".*"
 [tool.ruff]
 line-length = 99
 src = ["src"]
+extend-exclude = ["src/modelarrayio/__about__.py"]
 
 [tool.ruff.lint]
 extend-select = [
@@ -155,6 +156,7 @@ inline-quotes = "single"
 [tool.ruff.lint.extend-per-file-ignores]
 "*/test_*.py" = ["S101"]
 "docs/conf.py" = ["A001"]
+"src/modelarrayio/__about__.py" = ["Q000", "RUF022"]
 
 [tool.ruff.format]
 quote-style = "single"
 
@@ -1,14 +1,18 @@
 """Convert CIFTI2 dscalar data to an HDF5 file."""
 
+from __future__ import annotations
+
 import argparse
 import logging
 import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
 
 import h5py
 import pandas as pd
 from tqdm import tqdm
 
+from modelarrayio.cli import utils as cli_utils
 from modelarrayio.cli.parser_utils import (
     add_backend_arg,
     add_cohort_arg,
@@ -17,7 +21,6 @@
     add_scalar_columns_arg,
     add_storage_args,
 )
-from modelarrayio.storage import h5_storage, tiledb_storage
 from modelarrayio.utils.cifti import (
     _build_scalar_sources,
     _cohort_to_long_dataframe,
@@ -82,6 +85,7 @@ def cifti_to_h5(
     """
     cohort_df = pd.read_csv(cohort_file)
     cohort_long = _cohort_to_long_dataframe(cohort_df, scalar_columns=scalar_columns)
+    output_path = Path(output)
     if cohort_long.empty:
         raise ValueError('Cohort file does not contain any scalar entries after normalization.')
     scalar_sources = _build_scalar_sources(cohort_long)
@@ -90,101 +94,82 @@ def cifti_to_h5(
 
     if backend == 'hdf5':
         scalars, last_brain_names = _load_cohort_cifti(cohort_long, s3_workers)
-
-        f = h5py.File(output, 'w')
-
         greyordinate_table, structure_names = brain_names_to_dataframe(last_brain_names)
-        greyordinatesh5 = f.create_dataset(
-            name='greyordinates', data=greyordinate_table.to_numpy().T
-        )
-        greyordinatesh5.attrs['column_names'] = list(greyordinate_table.columns)
-        greyordinatesh5.attrs['structure_names'] = structure_names
-
-        for scalar_name in scalars.keys():
-            num_subjects = len(scalars[scalar_name])
-            num_items = scalars[scalar_name][0].shape[0] if num_subjects > 0 else 0
-            dset = h5_storage.create_empty_scalar_matrix_dataset(
-                f,
-                f'scalars/{scalar_name}/values',
-                num_subjects,
-                num_items,
+        output_path = cli_utils.prepare_output_parent(output_path)
+        with h5py.File(output_path, 'w') as h5_file:
+            cli_utils.write_table_dataset(
+                h5_file,
+                'greyordinates',
+                greyordinate_table,
+                extra_attrs={'structure_names': structure_names},
+            )
+            cli_utils.write_hdf5_scalar_matrices(
+                h5_file,
+                scalars,
+                scalar_sources,
                 storage_dtype=storage_dtype,
                 compression=compression,
                 compression_level=compression_level,
                 shuffle=shuffle,
                 chunk_voxels=chunk_voxels,
                 target_chunk_mb=target_chunk_mb,
-                sources_list=scalar_sources[scalar_name],
             )
+        return int(not output_path.exists())
 
-            h5_storage.write_rows_in_column_stripes(dset, scalars[scalar_name])
-        f.close()
-        return int(not os.path.exists(output))
-    else:
-        os.makedirs(output, exist_ok=True)
-        if not scalar_sources:
-            return 0
-
-        # Establish a reference brain axis once to ensure consistent ordering across workers.
-        _first_scalar, first_sources = next(iter(scalar_sources.items()))
-        first_path = first_sources[0]
-        _, reference_brain_names = extract_cifti_scalar_data(first_path)
-
-        def _process_scalar_job(scalar_name, source_files):
-            dataset_path = f'scalars/{scalar_name}/values'
-            rows = []
-            for source_file in source_files:
-                cifti_data, _ = extract_cifti_scalar_data(
-                    source_file, reference_brain_names=reference_brain_names
-                )
-                rows.append(cifti_data)
-
-            num_subjects = len(rows)
-            if num_subjects == 0:
-                return scalar_name
-            num_items = rows[0].shape[0]
-            tiledb_storage.create_empty_scalar_matrix_array(
-                output,
-                dataset_path,
-                num_subjects,
-                num_items,
+    output_path.mkdir(parents=True, exist_ok=True)
+    if not scalar_sources:
+        return 0
+
+    _first_scalar, first_sources = next(iter(scalar_sources.items()))
+    first_path = first_sources[0]
+    _, reference_brain_names = extract_cifti_scalar_data(first_path)
+
+    def _process_scalar_job(scalar_name, source_files):
+        rows = []
+        for source_file in source_files:
+            cifti_data, _ = extract_cifti_scalar_data(
+                source_file, reference_brain_names=reference_brain_names
+            )
+            rows.append(cifti_data)
+
+        if rows:
+            cli_utils.write_tiledb_scalar_matrices(
+                output_path,
+                {scalar_name: rows},
+                {scalar_name: source_files},
                 storage_dtype=storage_dtype,
                 compression=compression,
                 compression_level=compression_level,
                 shuffle=shuffle,
-                tile_voxels=chunk_voxels,
-                target_tile_mb=target_chunk_mb,
-                sources_list=source_files,
+                chunk_voxels=chunk_voxels,
+                target_chunk_mb=target_chunk_mb,
+                write_column_name_arrays=True,
             )
-            # write column names array for ModelArray compatibility
-            tiledb_storage.write_column_names(output, scalar_name, source_files)
-            uri = os.path.join(output, dataset_path)
-            tiledb_storage.write_rows_in_column_stripes(uri, rows)
             return scalar_name
 
-        scalar_names = list(scalar_sources.keys())
-        worker_count = workers if isinstance(workers, int) and workers > 0 else None
-        if worker_count is None:
-            cpu_count = os.cpu_count() or 1
-            worker_count = min(len(scalar_names), max(1, cpu_count))
-        else:
-            worker_count = min(len(scalar_names), worker_count)
-
-        if worker_count <= 1:
-            for scalar_name in scalar_names:
-                _process_scalar_job(scalar_name, scalar_sources[scalar_name])
-        else:
-            desc = 'TileDB scalars'
-            with ThreadPoolExecutor(max_workers=worker_count) as executor:
-                futures = {
-                    executor.submit(
-                        _process_scalar_job, scalar_name, scalar_sources[scalar_name]
-                    ): scalar_name
-                    for scalar_name in scalar_names
-                }
-                for future in tqdm(as_completed(futures), total=len(futures), desc=desc):
-                    future.result()
-        return 0
+    scalar_names = list(scalar_sources.keys())
+    worker_count = workers if isinstance(workers, int) and workers > 0 else None
+    if worker_count is None:
+        cpu_count = os.cpu_count() or 1
+        worker_count = min(len(scalar_names), max(1, cpu_count))
+    else:
+        worker_count = min(len(scalar_names), worker_count)
+
+    if worker_count <= 1:
+        for scalar_name in scalar_names:
+            _process_scalar_job(scalar_name, scalar_sources[scalar_name])
+    else:
+        desc = 'TileDB scalars'
+        with ThreadPoolExecutor(max_workers=worker_count) as executor:
+            futures = {
+                executor.submit(_process_scalar_job, scalar_name, scalar_sources[scalar_name]): (
+                    scalar_name
+                )
+                for scalar_name in scalar_names
+            }
+            for future in tqdm(as_completed(futures), total=len(futures), desc=desc):
+                future.result()
+    return 0
 
 
 def cifti_to_h5_main(
@@ -203,10 +188,7 @@ def cifti_to_h5_main(
     log_level='INFO',
 ):
     """Entry point for the ``modelarrayio cifti-to-h5`` command."""
-    logging.basicConfig(
-        level=getattr(logging, str(log_level).upper(), logging.INFO),
-        format='[%(levelname)s] %(name)s: %(message)s',
-    )
+    cli_utils.configure_logging(log_level)
     return cifti_to_h5(
         cohort_file=cohort_file,
         backend=backend,