@@ -188,6 +188,7 @@ def __init__(
188188 }
189189
190190 self .to_translators = {
191+ "gnomad" : self ._to_gnomad ,
191192 "hgvs" : self ._to_hgvs ,
192193 "spdi" : self ._to_spdi ,
193194 }
@@ -416,6 +417,57 @@ def _from_spdi(self, spdi_expr: str, **kwargs) -> models.Allele | None:
416417
417418 return self ._create_allele (values , ** kwargs )
418419
420+ def _to_gnomad (
421+ self , vo : models .Allele , namespace : str | None = "refseq" , ** kwargs
422+ ) -> list [str ]:
423+ """Generate a *list* of gnomAD-style identifiers for VRS Allele.
424+
425+ If no alias translations are available, an empty list is
426+ returned.
427+
428+ If the VRS object cannot be expressed in gnomAD-style, raises ValueError.
429+ """
430+ sequence = f"ga4gh:{ vo .location .get_refget_accession ()} "
431+ aliases = self .data_proxy .translate_sequence_identifier (sequence , namespace )
432+ aliases = [a .split (":" )[1 ] for a in aliases ]
433+ seq_proxies = {a : SequenceProxy (self .data_proxy , a ) for a in aliases }
434+ start , end = vo .location .start , vo .location .end
435+ spdi_exprs = []
436+
437+ for alias in aliases :
438+ # Get the reference sequence
439+ seq_proxy = seq_proxies [alias ]
440+ ref_seq = seq_proxy [start :end ]
441+
442+ if vo .state .type == models .VrsType .REF_LEN_EXPR .value :
443+ # Derived from reference. sequence included if under limit, but
444+ # we can derive it again from the reference.
445+ alt_seq = denormalize_reference_length_expression (
446+ ref_seq = ref_seq ,
447+ repeat_subunit_length = vo .state .repeatSubunitLength ,
448+ alt_length = vo .state .length ,
449+ )
450+ # Warn if the derived sequence is different from the one in the object
451+ if vo .state .sequence and vo .state .sequence .root != alt_seq :
452+ _logger .warning (
453+ "Derived sequence '%s' is different from provided state.sequence '%s'" ,
454+ alt_seq ,
455+ vo .state .sequence .root ,
456+ )
457+ else :
458+ alt_seq = vo .state .sequence .root
459+
460+ # Optionally allow using the length of the reference sequence
461+ # instead of the sequence itself.
462+ ref_seq_limit = kwargs .get ("ref_seq_limit" , 0 )
463+ if ref_seq_limit is not None and len (ref_seq ) > int (ref_seq_limit ):
464+ ref_seq = len (ref_seq )
465+
466+ spdi_expr = f"{ alias } :{ start } :{ ref_seq } :{ alt_seq } "
467+ spdi_exprs .append (spdi_expr )
468+
469+ return spdi_exprs
470+
419471 def _to_hgvs (
420472 self ,
421473 vo : models .Allele ,
@@ -424,6 +476,25 @@ def _to_hgvs(
424476 ) -> list [str ]:
425477 return self .hgvs_tools .from_allele (vo , namespace )
426478
479+ def _to_gnomad (
480+ self ,
481+ vo : models .Allele ,
482+ namespace : str | None = None ,
483+ ** kwargs , # noqa: ARG002
484+ ) -> list [str ]:
485+ """Generate a *list* of gnomAD-style identifiers for VRS Allele.
486+
487+ If no alias translations are available, an empty list is returned.
488+
489+ If the VRS object cannot be expressed in gnomAD-style, raises ValueError.
490+ """
491+ namespace = namespace or self .default_assembly_name
492+ if not namespace .startswith ('GRCh' ):
493+ raise ValueError (f"gnomAD-style identifiers require a GRCh reference sequence namespace, but got '{ namespace } '" )
494+ return self ._to_location_expression (
495+ "{alias}-{start}-{ref_seq}-{alt_seq}" , vo , namespace ,
496+ )
497+
427498 def _to_spdi (
428499 self , vo : models .Allele , namespace : str | None = "refseq" , ** kwargs
429500 ) -> list [str ]:
@@ -450,12 +521,20 @@ def _to_spdi(
450521 SPDI and VRS use identical normalization. The incoming Allele
451522 is expected to be normalized per VRS spec.
452523 """
524+ ref_seq_limit = kwargs .get ("ref_seq_limit" , 0 )
525+ return self ._to_location_expression (
526+ "{alias}:{start}:{ref_seq}:{alt_seq}" , vo , namespace , ref_seq_limi = ref_seq_limit ,
527+ )
528+
529+ def _to_location_expression (
530+ self , id_template : str , vo : models .Allele , namespace : str | None , ref_seq_limit : int | None = None ,
531+ ) -> list [str ]:
453532 sequence = f"ga4gh:{ vo .location .get_refget_accession ()} "
454533 aliases = self .data_proxy .translate_sequence_identifier (sequence , namespace )
455534 aliases = [a .split (":" )[1 ] for a in aliases ]
456535 seq_proxies = {a : SequenceProxy (self .data_proxy , a ) for a in aliases }
457536 start , end = vo .location .start , vo .location .end
458- spdi_exprs = []
537+ exprs = []
459538
460539 for alias in aliases :
461540 # Get the reference sequence
@@ -482,14 +561,12 @@ def _to_spdi(
482561
483562 # Optionally allow using the length of the reference sequence
484563 # instead of the sequence itself.
485- ref_seq_limit = kwargs .get ("ref_seq_limit" , 0 )
486564 if ref_seq_limit is not None and len (ref_seq ) > int (ref_seq_limit ):
487565 ref_seq = len (ref_seq )
488566
489- spdi_expr = f"{ alias } :{ start } :{ ref_seq } :{ alt_seq } "
490- spdi_exprs .append (spdi_expr )
567+ exprs .append (id_template .format (alias = alias , start = start , ref_seq = ref_seq , alt_seq = alt_seq ))
491568
492- return spdi_exprs
569+ return exprs
493570
494571 def _post_process_imported_allele (
495572 self , allele : models .Allele , ** kwargs
0 commit comments