Skip to content

Commit 180127f

Browse files
authored
feat: add version to annotated vcf (#541)
close #517 adds a small version tag to the description of the IDs field
1 parent 066404c commit 180127f

5 files changed

Lines changed: 24 additions & 30 deletions

File tree

src/ga4gh/vrs/extras/annotator/vcf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
VrsObjectIdentifierIs,
1414
use_ga4gh_compute_identifier_when,
1515
)
16+
from ga4gh.vrs import __version__
1617
from ga4gh.vrs.dataproxy import _DataProxy
1718
from ga4gh.vrs.extras.translator import AlleleTranslator
1819
from ga4gh.vrs.models import Allele
@@ -137,7 +138,7 @@ def _update_vcf_header(
137138
"String",
138139
(
139140
"The computed identifiers for the GA4GH VRS Alleles corresponding to the "
140-
f"GT indexes of the {info_field_desc} alleles"
141+
f"GT indexes of the {info_field_desc} alleles [VRS-Python version {__version__}]"
141142
),
142143
)
143144
vcf.header.info.add(
-145 Bytes
Binary file not shown.
-145 Bytes
Binary file not shown.
-139 Bytes
Binary file not shown.

tests/extras/test_annotate_vcf.py

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import pytest
1010

11+
from ga4gh.vrs import __version__
1112
from ga4gh.vrs.dataproxy import DataProxyValidationError, SeqRepoRESTDataProxy
1213
from ga4gh.vrs.extras.annotator.vcf import VcfAnnotator, VcfAnnotatorError
1314

@@ -33,6 +34,23 @@ def input_vcf():
3334
return TEST_DATA_DIR / "test_vcf_input.vcf"
3435

3536

37+
def compare_vcfs(actual_vcf_path: Path, expected_vcf_path: Path):
38+
"""VRS-Python version annotation would be annoying to manually update. This helper
39+
method replaces a placeholder string with the real version, and otherwise performs
40+
a pairwise check for all lines in each VCF.
41+
"""
42+
with gzip.open(actual_vcf_path, "rt") as out_vcf:
43+
out_vcf_lines = out_vcf.readlines()
44+
with gzip.open(expected_vcf_path, "rt") as expected_output:
45+
expected_output_lines = expected_output.readlines()
46+
for actual_line, expected_line in zip(
47+
out_vcf_lines, expected_output_lines, strict=False
48+
):
49+
if actual_line.startswith("##INFO=<ID=VRS_Allele_IDs"):
50+
expected_line = expected_line.replace("9999", __version__)
51+
assert actual_line == expected_line
52+
53+
3654
@pytest.mark.vcr
3755
def test_annotate_vcf_grch38_noattrs(
3856
vcf_annotator: VcfAnnotator, input_vcf: Path, tmp_path: Path, vcr_cassette
@@ -46,14 +64,7 @@ def test_annotate_vcf_grch38_noattrs(
4664

4765
# Test GRCh38 assembly, which was used for input_vcf and no vrs attributes
4866
vcf_annotator.annotate(input_vcf, output_vcf, output_pkl_path=output_vrs_pkl)
49-
with gzip.open(output_vcf, "rt") as out_vcf:
50-
out_vcf_lines = out_vcf.readlines()
51-
with gzip.open(expected_vcf_no_vrs_attrs, "rt") as expected_output:
52-
expected_output_lines = expected_output.readlines()
53-
for actual_line, expected_line in zip(
54-
out_vcf_lines, expected_output_lines, strict=False
55-
):
56-
assert actual_line == expected_line
67+
compare_vcfs(output_vcf, expected_vcf_no_vrs_attrs)
5768
assert output_vrs_pkl.exists()
5869
assert vcr_cassette.all_played
5970

@@ -71,14 +82,7 @@ def test_annotate_vcf_grch38_attrs(
7182
vcf_annotator.annotate(
7283
input_vcf, output_vcf, vrs_attributes=True, output_pkl_path=output_vrs_pkl
7384
)
74-
with gzip.open(output_vcf, "rt") as out_vcf:
75-
out_vcf_lines = out_vcf.readlines()
76-
with gzip.open(expected_vcf, "rt") as expected_output:
77-
expected_output_lines = expected_output.readlines()
78-
for actual_line, expected_line in zip(
79-
out_vcf_lines, expected_output_lines, strict=False
80-
):
81-
assert actual_line == expected_line
85+
compare_vcfs(output_vcf, expected_vcf)
8286
assert output_vrs_pkl.exists()
8387
assert vcr_cassette.all_played
8488

@@ -100,14 +104,7 @@ def test_annotate_vcf_grch38_attrs_altsonly(
100104
compute_for_ref=False,
101105
output_pkl_path=output_vrs_pkl,
102106
)
103-
with gzip.open(output_vcf, "rt") as out_vcf:
104-
out_vcf_lines = out_vcf.readlines()
105-
with gzip.open(expected_altsonly_vcf, "rt") as expected_output:
106-
expected_output_lines = expected_output.readlines()
107-
for actual_line, expected_line in zip(
108-
out_vcf_lines, expected_output_lines, strict=False
109-
):
110-
assert actual_line == expected_line
107+
compare_vcfs(output_vcf, expected_altsonly_vcf)
111108
assert output_vrs_pkl.exists()
112109
assert vcr_cassette.all_played
113110

@@ -166,11 +163,7 @@ def test_annotate_vcf_vcf_only(
166163

167164
# Test only VCF output
168165
vcf_annotator.annotate(input_vcf, output_vcf_path=output_vcf, vrs_attributes=True)
169-
with gzip.open(output_vcf, "rt") as out_vcf:
170-
out_vcf_lines = out_vcf.readlines()
171-
with gzip.open(expected_vcf, "rt") as expected_output:
172-
expected_output_lines = expected_output.readlines()
173-
assert out_vcf_lines == expected_output_lines
166+
compare_vcfs(output_vcf, expected_vcf)
174167
assert vcr_cassette.all_played
175168
assert not Path(output_vrs_pkl).exists()
176169

0 commit comments

Comments
 (0)