88
99import pytest
1010
11+ from ga4gh .vrs import __version__
1112from ga4gh .vrs .dataproxy import DataProxyValidationError , SeqRepoRESTDataProxy
1213from ga4gh .vrs .extras .annotator .vcf import VcfAnnotator , VcfAnnotatorError
1314
@@ -33,6 +34,23 @@ def input_vcf():
3334 return TEST_DATA_DIR / "test_vcf_input.vcf"
3435
3536
37+ def compare_vcfs (actual_vcf_path : Path , expected_vcf_path : Path ):
38+ """VRS-Python version annotation would be annoying to manually update. This helper
39+ method replaces a placeholder string with the real version, and otherwise performs
40+ a pairwise check for all lines in each VCF.
41+ """
42+ with gzip .open (actual_vcf_path , "rt" ) as out_vcf :
43+ out_vcf_lines = out_vcf .readlines ()
44+ with gzip .open (expected_vcf_path , "rt" ) as expected_output :
45+ expected_output_lines = expected_output .readlines ()
46+ for actual_line , expected_line in zip (
47+ out_vcf_lines , expected_output_lines , strict = False
48+ ):
49+ if actual_line .startswith ("##INFO=<ID=VRS_Allele_IDs" ):
50+ expected_line = expected_line .replace ("9999" , __version__ )
51+ assert actual_line == expected_line
52+
53+
3654@pytest .mark .vcr
3755def test_annotate_vcf_grch38_noattrs (
3856 vcf_annotator : VcfAnnotator , input_vcf : Path , tmp_path : Path , vcr_cassette
@@ -46,14 +64,7 @@ def test_annotate_vcf_grch38_noattrs(
4664
4765 # Test GRCh38 assembly, which was used for input_vcf and no vrs attributes
4866 vcf_annotator .annotate (input_vcf , output_vcf , output_pkl_path = output_vrs_pkl )
49- with gzip .open (output_vcf , "rt" ) as out_vcf :
50- out_vcf_lines = out_vcf .readlines ()
51- with gzip .open (expected_vcf_no_vrs_attrs , "rt" ) as expected_output :
52- expected_output_lines = expected_output .readlines ()
53- for actual_line , expected_line in zip (
54- out_vcf_lines , expected_output_lines , strict = False
55- ):
56- assert actual_line == expected_line
67+ compare_vcfs (output_vcf , expected_vcf_no_vrs_attrs )
5768 assert output_vrs_pkl .exists ()
5869 assert vcr_cassette .all_played
5970
@@ -71,14 +82,7 @@ def test_annotate_vcf_grch38_attrs(
7182 vcf_annotator .annotate (
7283 input_vcf , output_vcf , vrs_attributes = True , output_pkl_path = output_vrs_pkl
7384 )
74- with gzip .open (output_vcf , "rt" ) as out_vcf :
75- out_vcf_lines = out_vcf .readlines ()
76- with gzip .open (expected_vcf , "rt" ) as expected_output :
77- expected_output_lines = expected_output .readlines ()
78- for actual_line , expected_line in zip (
79- out_vcf_lines , expected_output_lines , strict = False
80- ):
81- assert actual_line == expected_line
85+ compare_vcfs (output_vcf , expected_vcf )
8286 assert output_vrs_pkl .exists ()
8387 assert vcr_cassette .all_played
8488
@@ -100,14 +104,7 @@ def test_annotate_vcf_grch38_attrs_altsonly(
100104 compute_for_ref = False ,
101105 output_pkl_path = output_vrs_pkl ,
102106 )
103- with gzip .open (output_vcf , "rt" ) as out_vcf :
104- out_vcf_lines = out_vcf .readlines ()
105- with gzip .open (expected_altsonly_vcf , "rt" ) as expected_output :
106- expected_output_lines = expected_output .readlines ()
107- for actual_line , expected_line in zip (
108- out_vcf_lines , expected_output_lines , strict = False
109- ):
110- assert actual_line == expected_line
107+ compare_vcfs (output_vcf , expected_altsonly_vcf )
111108 assert output_vrs_pkl .exists ()
112109 assert vcr_cassette .all_played
113110
@@ -166,11 +163,7 @@ def test_annotate_vcf_vcf_only(
166163
167164 # Test only VCF output
168165 vcf_annotator .annotate (input_vcf , output_vcf_path = output_vcf , vrs_attributes = True )
169- with gzip .open (output_vcf , "rt" ) as out_vcf :
170- out_vcf_lines = out_vcf .readlines ()
171- with gzip .open (expected_vcf , "rt" ) as expected_output :
172- expected_output_lines = expected_output .readlines ()
173- assert out_vcf_lines == expected_output_lines
166+ compare_vcfs (output_vcf , expected_vcf )
174167 assert vcr_cassette .all_played
175168 assert not Path (output_vrs_pkl ).exists ()
176169
0 commit comments