Skip to content

Commit de2e192

Browse files
committed
feat!: remove beacon translator
close #622 * Translator functionality was deprecated in #578 . This commit removes Beacon translator completely
1 parent 0c69104 commit de2e192

6 files changed

Lines changed: 6 additions & 209 deletions

File tree

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ VRS-Python is the reference implementation of the GA4GH Variation Representation
3737
- **`src/ga4gh/vrs/enderef.py`** - Converting between inlined and referenced VRS object forms
3838

3939
### Key Modules
40-
- **`src/ga4gh/vrs/extras/translator.py`** - Translates between VRS and external formats (HGVS, SPDI, gnomAD, Beacon)
40+
- **`src/ga4gh/vrs/extras/translator.py`** - Translates between VRS and external formats (HGVS, SPDI, gnomAD)
4141
- **`src/ga4gh/vrs/extras/annotator/`** - VCF annotation tools with VRS identifiers
4242
- **`src/ga4gh/vrs/utils/hgvs_tools.py`** - HGVS parsing and validation utilities
4343
- **`src/ga4gh/core/`** - Core GA4GH models and identifier generation

notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
},
1212
"source": [
1313
"# 4. Exploring the AlleleTranslator\n",
14-
"There are four variant nomenclatures available in the vrs-python *AlleleTranslator*: SPDI, gnomad/VCF, Beacon and HGVS. In this notebook we will perform a simple Allele translation for each. We will use each of the four nomenclatures for a single variant in translating variants to VRS. his variant can be viewed in \n",
14+
"There are four variant nomenclatures available in the vrs-python *AlleleTranslator*: SPDI, gnomad/VCF, and HGVS. In this notebook we will perform a simple Allele translation for each. We will use each of the four nomenclatures for a single variant in translating variants to VRS. his variant can be viewed in \n",
1515
"[ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/variation/652570) and in [gnomAD](https://gnomad.broadinstitute.org/variant/5-80656489-C-T)."
1616
]
1717
},
@@ -386,61 +386,6 @@
386386
"allele_translator.translate_to(allele, \"spdi\")"
387387
]
388388
},
389-
{
390-
"cell_type": "markdown",
391-
"id": "4e0911a7694a060",
392-
"metadata": {
393-
"collapsed": false,
394-
"jupyter": {
395-
"outputs_hidden": false
396-
}
397-
},
398-
"source": [
399-
"#### From Beacon (VCF-like)\n",
400-
"For variants represented in the Beacon nomenclature, the *AlleleTranslator* currently only supports *translate_from* to convert to VRS. *translate_to* is not yet supported."
401-
]
402-
},
403-
{
404-
"cell_type": "code",
405-
"execution_count": 10,
406-
"id": "57f54e6c3854a48f",
407-
"metadata": {
408-
"ExecuteTime": {
409-
"end_time": "2024-04-19T17:13:51.743020Z",
410-
"start_time": "2024-04-19T17:13:51.663593Z"
411-
},
412-
"collapsed": false,
413-
"jupyter": {
414-
"outputs_hidden": false
415-
}
416-
},
417-
"outputs": [
418-
{
419-
"data": {
420-
"text/plain": [
421-
"{'id': 'ga4gh:VA.ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n",
422-
" 'type': 'Allele',\n",
423-
" 'digest': 'ebezGL6HoAhtGJyVnB_mE5BH18ntKev4',\n",
424-
" 'location': {'id': 'ga4gh:SL.JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n",
425-
" 'type': 'SequenceLocation',\n",
426-
" 'digest': 'JiLRuuyS5wefF_6-Vw7m3Yoqqb2YFkss',\n",
427-
" 'sequenceReference': {'type': 'SequenceReference',\n",
428-
" 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n",
429-
" 'start': 80656488,\n",
430-
" 'end': 80656489},\n",
431-
" 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'T'}}"
432-
]
433-
},
434-
"execution_count": 10,
435-
"metadata": {},
436-
"output_type": "execute_result"
437-
}
438-
],
439-
"source": [
440-
"allele = allele_translator.translate_from(\"5 : 80656489 C > T\", \"beacon\")\n",
441-
"allele.model_dump(exclude_none=True)"
442-
]
443-
},
444389
{
445390
"cell_type": "markdown",
446391
"id": "c4a6abfd56b8fa1e",

notebooks/getting_started/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ from its component parts. The notebook details how to add VRS identifiers to the
140140
### 4 Exploring the Allele Translator
141141
142142
The current implementation of vrs-python facilitates transformation of variants
143-
in a number of different variant nomenclatures (SPDI, HGVS, gnomAD and Beacon) to VRS form. In the
143+
in a number of different variant nomenclatures (SPDI, HGVS, and gnomAD) to VRS form. In the
144144
[Exploring the Allele Translator](4_Exploring_the_AlleleTranslator.ipynb) notebook,
145145
we show how to transform basic variants to VRS, and in some cases, back to the original nomenclature of the variant.
146146

src/ga4gh/vrs/extras/translator.py

Lines changed: 2 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Translates various external formats into VRS models.
22
3-
Input formats: VRS (serialized), hgvs, spdi, gnomad (vcf), beacon
3+
Input formats: VRS (serialized), hgvs, spdi, gnomad (vcf)
44
Output formats: VRS (serialized), hgvs, spdi, gnomad (vcf)
55
66
"""
@@ -11,8 +11,6 @@
1111
from collections.abc import Mapping
1212
from typing import Protocol
1313

14-
from typing_extensions import deprecated
15-
1614
from ga4gh.core import ga4gh_identify
1715
from ga4gh.vrs import models, normalize
1816
from ga4gh.vrs.dataproxy import SequenceProxy, _DataProxy
@@ -58,9 +56,6 @@ class _Translator(ABC): # noqa: B024
5856
5957
"""
6058

61-
beacon_re = re.compile(
62-
r"(?P<chr>[^-]+)\s*:\s*(?P<pos>\d+)\s*(?P<ref>\w+)\s*>\s*(?P<alt>\w+)"
63-
)
6459
gnomad_re = re.compile(
6560
r"(?P<chr>[^-]+)-(?P<pos>\d+)-(?P<ref>[ACGTURYKMSWBDHVN]+)-(?P<alt>[ACGTURYKMSWBDHVN]+)",
6661
re.IGNORECASE,
@@ -101,7 +96,7 @@ def translate_from(
10196
EFO:0030067 for deletions and EFO:0030070 for duplications
10297
For AlleleTranslator
10398
assembly_name (str): Assembly used for `var`. Defaults to the
104-
`default_assembly_name`. Only used for beacon and gnomad.
99+
`default_assembly_name`. Only used for gnomad.
105100
require_validation (bool): If `True` then validation checks must pass in
106101
order to return a VRS object. A `DataProxyValidationError` will be
107102
raised if validation checks fail. If `False` then VRS object will be
@@ -180,7 +175,6 @@ def __init__(
180175
super().__init__(data_proxy, default_assembly_name, identify)
181176

182177
self.from_translators = {
183-
"beacon": self._from_beacon,
184178
"gnomad": self._from_gnomad,
185179
"hgvs": self._from_hgvs,
186180
"spdi": self._from_spdi,
@@ -215,68 +209,6 @@ def _create_allele(self, values: dict, **kwargs) -> models.Allele:
215209
allele = models.Allele(location=location, state=state)
216210
return self._post_process_imported_allele(allele, **kwargs)
217211

218-
@deprecated("This method does not match the Beacon spec and will be removed in v3.")
219-
def _from_beacon(self, beacon_expr: str, **kwargs) -> models.Allele | None:
220-
"""Parse beacon expression into VRS Allele
221-
222-
kwargs:
223-
assembly_name (str): Assembly used for `beacon_expr`.
224-
rle_seq_limit Optional(int): If RLE is set as the new state after
225-
normalization, this sets the limit for the length of the `sequence`.
226-
To exclude `sequence` from the response, set to 0.
227-
For no limit, set to `None`.
228-
Defaults value set in instance variable, `rle_seq_limit`.
229-
do_normalize (bool): `True` if fully justified normalization should be
230-
performed. `False` otherwise. Defaults to `True`
231-
232-
#>>> a = tlr.from_beacon("19 : 44908822 C > T")
233-
#>>> a.model_dump()
234-
{
235-
'location': {
236-
'end': 44908822,
237-
'start': 44908821,
238-
'sequenceReference': {
239-
'type': 'SequenceReference',
240-
'refgetAccession': 'SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl'
241-
},
242-
'type': 'SequenceLocation'
243-
},
244-
'state': {
245-
'sequence': 'C',
246-
'type': 'LiteralSequenceExpression'
247-
},
248-
'type': 'Allele'
249-
}
250-
251-
"""
252-
if not isinstance(beacon_expr, str):
253-
return None
254-
255-
m = self.beacon_re.match(beacon_expr.replace(" ", ""))
256-
if not m:
257-
return None
258-
259-
g = m.groupdict()
260-
assembly_name = kwargs.get("assembly_name", self.default_assembly_name)
261-
sequence = assembly_name + ":" + g["chr"]
262-
refget_accession = self.data_proxy.derive_refget_accession(sequence)
263-
if not refget_accession:
264-
return None
265-
266-
start = int(g["pos"]) - 1
267-
ref = g["ref"]
268-
alt = g["alt"]
269-
end = start + len(ref)
270-
ins_seq = alt
271-
272-
values = {
273-
"refget_accession": refget_accession,
274-
"start": start,
275-
"end": end,
276-
"literal_sequence": ins_seq,
277-
}
278-
return self._create_allele(values, **kwargs)
279-
280212
def _from_gnomad(self, gnomad_expr: str, **kwargs) -> models.Allele | None:
281213
"""Parse gnomAD-style VCF expression into VRS Allele
282214

tests/extras/cassettes/test_from_beacon.yaml

Lines changed: 0 additions & 55 deletions
This file was deleted.

tests/extras/test_allele_translator.py

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def tlr(rest_dataproxy):
1717
# https://www.ncbi.nlm.nih.gov/clinvar/variation/17848/?new_evidence=true
1818
snv_inputs = {
1919
"hgvs": "NC_000019.10:g.44908822C>T",
20-
"beacon": "19 : 44908822 C > T",
2120
"spdi": "NC_000019.10:44908821:1:T",
2221
"gnomad": "19-44908822-C-T",
2322
}
@@ -39,7 +38,6 @@ def tlr(rest_dataproxy):
3938
# https://www.ncbi.nlm.nih.gov/clinvar/variation/693259/?new_evidence=true
4039
mito_inputs = {
4140
"hgvs": "NC_012920.1:m.10083A>G",
42-
"beacon": "MT : 10083 A > G",
4341
"spdi": "NC_012920.1:10082:A:G",
4442
"gnomad": "MT-10083-A-G",
4543
}
@@ -327,31 +325,11 @@ def test_rle_round_trip_gnomad_spdi(tlr):
327325

328326
def test_from_invalid(tlr):
329327
with pytest.raises(
330-
ValueError, match="Unable to parse data as beacon, gnomad, hgvs, spdi, vrs"
328+
ValueError, match="Unable to parse data as gnomad, hgvs, spdi, vrs"
331329
):
332330
tlr.translate_from("BRAF amplication")
333331

334332

335-
@pytest.mark.vcr
336-
def test_from_beacon(tlr):
337-
do_normalize = False
338-
with pytest.deprecated_call():
339-
assert (
340-
tlr._from_beacon(
341-
snv_inputs["beacon"], do_normalize=do_normalize
342-
).model_dump(exclude_none=True)
343-
== snv_output
344-
)
345-
346-
with pytest.deprecated_call():
347-
assert (
348-
tlr._from_beacon(
349-
mito_inputs["beacon"], do_normalize=do_normalize
350-
).model_dump(exclude_none=True)
351-
== mito_output
352-
)
353-
354-
355333
@pytest.mark.vcr
356334
def test_from_gnomad(tlr):
357335
do_normalize = False
@@ -977,9 +955,6 @@ def test_normalize_microsatellite_counts(tlr, case):
977955
# @pytest.mark.vcr
978956
# def test_errors(tlr):
979957
# with pytest.raises(ValueError):
980-
# tlr._from_beacon("bogus")
981-
#
982-
# with pytest.raises(ValueError):
983958
# tlr._from_gnomad("NM_182763.2:c.688+403C>T")
984959
#
985960
# with pytest.raises(ValueError):

0 commit comments

Comments
 (0)