Skip to content

Commit d2e786b

Browse files
committed
ruff formatting
1 parent 91bc9c0 commit d2e786b

1 file changed

Lines changed: 74 additions & 34 deletions

File tree

aviti_index_fixer.py

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,30 @@
33
import pandas as pd
44

55
NT_COMPLIMENT = {
6-
'A': 'T',
7-
'T': 'A',
8-
'C': 'G',
9-
'G': 'C',
6+
"A": "T",
7+
"T": "A",
8+
"C": "G",
9+
"G": "C",
1010
}
1111

12+
1213
def load_manifest(path):
1314
"""Load the manifest from the given path."""
14-
with open(path, 'r') as file:
15+
with open(path, "r") as file:
1516
manifest_content = file.read()
16-
17+
1718
header_section = manifest_content.split("[SAMPLES]")[0]
1819
samples_section = manifest_content.split("[SAMPLES]")[1].strip().split("\n")
19-
20+
2021
all_samples = load_sample_dataframe(samples_section)
2122
samples_info = all_samples[all_samples["Project"] != "Control"].copy()
22-
controls_info = all_samples[all_samples["Project"] == "Control"].copy() # So that we don't apply any changes to control samples
23-
23+
controls_info = all_samples[
24+
all_samples["Project"] == "Control"
25+
].copy() # So that we don't apply any changes to control samples
26+
2427
return header_section, samples_info, controls_info
2528

29+
2630
def load_sample_dataframe(manifest_data):
2731
"""Load the sample data into a DataFrame."""
2832
header = manifest_data[0]
@@ -36,58 +40,94 @@ def load_sample_dataframe(manifest_data):
3640
samples_info = pd.DataFrame.from_dict(sample_dicts)
3741
return samples_info
3842

43+
3944
def reverse_complement_index(index):
4045
"""Return the reverse complement of a given index."""
41-
return ''.join(NT_COMPLIMENT[nuc] for nuc in reversed(index))
46+
return "".join(NT_COMPLIMENT[nuc] for nuc in reversed(index))
4247

43-
@cli.command()
44-
@cli.option('--manifest_path', required=True, help='Path to the sample manifest. e.g. ~/fc/AVITI_run_manifest_2450545934_24-1214961_250722_154957_EunkyoungChoi_untrimmed.csv')
45-
@cli.option('--project', multiple=True, required=False, help='Project ID, e.g. P10001. Only the indexes of samples with this specific project ID will be changed. Use multiple times for multiple projects.')
46-
@cli.option('--swap', is_flag=True, help='Swaps index 1 and 2.')
47-
@cli.option('--rc1', is_flag=True, help='Exchanges index 1 for its reverse compliment.')
48-
@cli.option('--rc2', is_flag=True, help='Exchanges index 2 for its reverse compliment.')
49-
@cli.option('--add_sample', multiple=True, help='Include additional sample(s). Use multiple times for multiple samples, or provide a file. Each new sample should have the same format as in the existing manifest. Example: --add_sample P12345,ATCG,CGTA,1,A__Project_25_16,301-10-10-301,ATCG-CGTA,')
5048

49+
@cli.command()
50+
@cli.option(
51+
"--manifest_path",
52+
required=True,
53+
help="Path to the sample manifest. e.g. ~/fc/AVITI_run_manifest_2450545934_24-1214961_250722_154957_EunkyoungChoi_untrimmed.csv",
54+
)
55+
@cli.option(
56+
"--project",
57+
multiple=True,
58+
required=False,
59+
help="Project ID, e.g. P10001. Only the indexes of samples with this specific project ID will be changed. Use multiple times for multiple projects.",
60+
)
61+
@cli.option("--swap", is_flag=True, help="Swaps index 1 and 2.")
62+
@cli.option("--rc1", is_flag=True, help="Exchanges index 1 for its reverse compliment.")
63+
@cli.option("--rc2", is_flag=True, help="Exchanges index 2 for its reverse compliment.")
64+
@cli.option(
65+
"--add_sample",
66+
multiple=True,
67+
help="Include additional sample(s). Use multiple times for multiple samples, or provide a file. Each new sample should have the same format as in the existing manifest. Example: --add_sample P12345,ATCG,CGTA,1,A__Project_25_16,301-10-10-301,ATCG-CGTA,",
68+
)
5169
def main(manifest_path, project, swap, rc1, rc2, add_sample):
5270
"""Main function to fix the samplesheet indexes for AVITI runs."""
53-
manifest_header, samples_info, controls_info = load_manifest(manifest_path)
71+
manifest_header, samples_info, controls_info = load_manifest(manifest_path)
5472

5573
if project:
56-
mask = samples_info['SampleName'].apply(lambda x: x.split("_")[0] in project)
74+
mask = samples_info["SampleName"].apply(lambda x: x.split("_")[0] in project)
5775
else:
5876
mask = pd.Series([True] * len(samples_info))
5977

6078
if rc1:
61-
samples_info.loc[mask, 'Index1'] = samples_info.loc[mask, 'Index1'].apply(reverse_complement_index)
79+
samples_info.loc[mask, "Index1"] = samples_info.loc[mask, "Index1"].apply(
80+
reverse_complement_index
81+
)
6282
print("Reverse complementing Index1")
6383
if rc2:
64-
samples_info.loc[mask, 'Index2'] = samples_info.loc[mask, 'Index2'].apply(reverse_complement_index)
84+
samples_info.loc[mask, "Index2"] = samples_info.loc[mask, "Index2"].apply(
85+
reverse_complement_index
86+
)
6587
print("Reverse complementing Index2")
6688
if swap:
67-
samples_info.loc[mask, ['Index1', 'Index2']] = samples_info.loc[mask, ['Index2', 'Index1']].values
89+
samples_info.loc[mask, ["Index1", "Index2"]] = samples_info.loc[
90+
mask, ["Index2", "Index1"]
91+
].values
6892
print("Swapping Index1 and Index2")
6993
if rc1 or rc2 or swap:
7094
# Update lims_label if any changes were made
71-
samples_info.loc[mask, 'lims_label'] = samples_info.loc[mask, 'Index1'] + '-' + samples_info.loc[mask, 'Index2']
95+
samples_info.loc[mask, "lims_label"] = (
96+
samples_info.loc[mask, "Index1"] + "-" + samples_info.loc[mask, "Index2"]
97+
)
7298
for additional_sample in add_sample:
7399
if os.path.isfile(additional_sample):
74100
additional_samples = pd.read_csv(additional_sample, header=None)
75101
additional_samples.columns = samples_info.columns
76102
else:
77-
additional_samples = pd.DataFrame([additional_sample.split(',')], columns=samples_info.columns)
103+
additional_samples = pd.DataFrame(
104+
[additional_sample.split(",")], columns=samples_info.columns
105+
)
78106
samples_info = pd.concat([samples_info, additional_samples], ignore_index=True)
79107
if len(additional_samples) == 1:
80-
print("Adding additional sample:", additional_samples['SampleName'].tolist()[0])
108+
print(
109+
"Adding additional sample:",
110+
additional_samples["SampleName"].tolist()[0],
111+
)
81112
else:
82-
print("Adding additional samples:", (", ").join(additional_samples['SampleName'].tolist()))
83-
84-
samples_info['Lane'] = samples_info['Lane'].astype(int)
85-
samples_info.sort_values(by=['Lane', 'SampleName'], inplace=True)
86-
87-
updated_samplesheet = manifest_header + "\n[SAMPLES]\n" + samples_info.to_csv(index=False, header=True) + controls_info.to_csv(index=False, header=False)
88-
output_path = manifest_path.replace('.csv', '_updated.csv')
89-
with open(output_path, 'w') as output_file:
113+
print(
114+
"Adding additional samples:",
115+
(", ").join(additional_samples["SampleName"].tolist()),
116+
)
117+
118+
samples_info["Lane"] = samples_info["Lane"].astype(int)
119+
samples_info.sort_values(by=["Lane", "SampleName"], inplace=True)
120+
121+
updated_samplesheet = (
122+
manifest_header
123+
+ "\n[SAMPLES]\n"
124+
+ samples_info.to_csv(index=False, header=True)
125+
+ controls_info.to_csv(index=False, header=False)
126+
)
127+
output_path = manifest_path.replace(".csv", "_updated.csv")
128+
with open(output_path, "w") as output_file:
90129
output_file.write(updated_samplesheet)
91130

92-
if __name__ == '__main__':
131+
132+
if __name__ == "__main__":
93133
main()

0 commit comments

Comments
 (0)