|
| 1 | +#!/usr/bin/env python |
| 2 | +"""Plot a matrix showing which participants have data from each dataset. |
| 3 | +
|
| 4 | +Creates a visualization with participants as rows and datasets (hyperface, |
| 5 | +budapest, identity-decoding) as columns. |
| 6 | +
|
| 7 | +Examples: |
| 8 | + python scripts/qa/qa-plot-participant-datasets.py |
| 9 | +""" |
| 10 | + |
| 11 | +import matplotlib.pyplot as plt |
| 12 | +import numpy as np |
| 13 | +import pandas as pd |
| 14 | + |
| 15 | +from hyperface.qa import create_qa_argument_parser, get_config |
| 16 | + |
| 17 | + |
| 18 | +def main(): |
| 19 | + parser = create_qa_argument_parser( |
| 20 | + description="Plot participant dataset availability matrix.", |
| 21 | + include_subjects=False, |
| 22 | + ) |
| 23 | + args = parser.parse_args() |
| 24 | + config = get_config(config_path=args.config, data_dir=args.data_dir) |
| 25 | + |
| 26 | + # Load participants file |
| 27 | + participants_file = config.paths.data_dir / "participants.tsv" |
| 28 | + df = pd.read_csv(participants_file, sep="\t") |
| 29 | + |
| 30 | + # Create binary matrix |
| 31 | + n_participants = len(df) |
| 32 | + datasets = ["hyperface", "budapest", "identity_decoding"] |
| 33 | + matrix = np.zeros((n_participants, 3), dtype=int) |
| 34 | + |
| 35 | + # All participants have hyperface |
| 36 | + matrix[:, 0] = 1 |
| 37 | + |
| 38 | + # Convert Yes/No to 1/0 |
| 39 | + matrix[:, 1] = (df["budapest"] == "Yes").astype(int) |
| 40 | + matrix[:, 2] = (df["identity_decoding"] == "Yes").astype(int) |
| 41 | + |
| 42 | + # Create figure with square cells |
| 43 | + fig, ax = plt.subplots(figsize=(2, n_participants * 0.25)) |
| 44 | + |
| 45 | + # Plot heatmap using pcolormesh for precise cell edges |
| 46 | + cmap = plt.cm.colors.ListedColormap(["white", "tab:blue"]) |
| 47 | + ax.pcolormesh( |
| 48 | + matrix, |
| 49 | + cmap=cmap, |
| 50 | + vmin=0, |
| 51 | + vmax=1, |
| 52 | + edgecolors="lightgray", |
| 53 | + linewidth=0.3, |
| 54 | + ) |
| 55 | + ax.set_aspect("equal") |
| 56 | + ax.invert_yaxis() |
| 57 | + |
| 58 | + # Labels |
| 59 | + ax.set_xticks(np.arange(3) + 0.5) |
| 60 | + ax.set_xticklabels( |
| 61 | + ["hyperface", "budapest", "identity\ndecoding"], rotation=45, ha="left" |
| 62 | + ) |
| 63 | + ax.xaxis.tick_top() |
| 64 | + ax.set_yticks(np.arange(n_participants) + 0.5) |
| 65 | + ax.set_yticklabels(df["participant_id"].str.replace("sub-", "")) |
| 66 | + |
| 67 | + ax.set_ylabel("Participant") |
| 68 | + |
| 69 | + plt.tight_layout() |
| 70 | + |
| 71 | + # Save figure |
| 72 | + output_dir = config.paths.qa_base_dir / "figures" |
| 73 | + output_dir.mkdir(parents=True, exist_ok=True) |
| 74 | + output_file = output_dir / "participant-dataset-matrix.png" |
| 75 | + fig.savefig(output_file, dpi=600, bbox_inches="tight") |
| 76 | + print(f"Saved figure to {output_file}") |
| 77 | + |
| 78 | + plt.close(fig) |
| 79 | + |
| 80 | + |
| 81 | +if __name__ == "__main__": |
| 82 | + main() |
0 commit comments