Skip to content

Commit 48e88e7

Browse files
authored
Merge branch 'vrs/2.1.0-snapshot.2026-02' into issue-602
2 parents daa1c87 + a0f2234 commit 48e88e7

11 files changed

Lines changed: 76 additions & 46 deletions

File tree

pyproject.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ dependencies = [
4040
"bioutils",
4141
"requests",
4242
"canonicaljson",
43-
"setuptools>=78.1.0", # TODO remove this pin caused by HGVS issue
4443
"typing-extensions",
4544
]
4645

@@ -53,7 +52,6 @@ extras = [
5352
"dill~=0.3.7",
5453
"click",
5554
"pysam==0.23.0", # pinned pending https://github.com/ga4gh/vrs-python/issues/560
56-
"typing_extensions",
5755
]
5856
dev = [
5957
# tests
@@ -71,7 +69,6 @@ dev = [
7169
]
7270
notebooks = [
7371
"jupyter",
74-
"tabulate",
7572
"pyyaml"
7673
]
7774

src/ga4gh/core/enderef.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import logging
1313

14+
from pydantic.main import BaseModel
15+
1416
from .identifiers import ga4gh_identify, is_ga4gh_identifier
1517
from .pydantic import (
1618
get_pydantic_root,
@@ -22,7 +24,12 @@
2224
_logger = logging.getLogger(__name__)
2325

2426

25-
def ga4gh_enref(o, cra_map, object_store=None, return_id_obj_tuple=False) -> tuple: # noqa: ANN001
27+
def ga4gh_enref(
28+
o, # noqa: ANN001
29+
cra_map, # noqa: ANN001
30+
object_store=None, # noqa: ANN001
31+
return_id_obj_tuple: bool = False,
32+
) -> tuple:
2633
"""Recursively convert "referable attributes" from inlined to
2734
referenced form. Returns a new object.
2835
@@ -35,13 +42,13 @@ def ga4gh_enref(o, cra_map, object_store=None, return_id_obj_tuple=False) -> tup
3542
:raise TypeError: if any object IDs are non-GA4GH CURIEs
3643
"""
3744

38-
def _id_and_store(o): # noqa: ANN202 ANN001
45+
def _id_and_store(o) -> str | None: # noqa: ANN001
3946
_id = ga4gh_identify(o)
4047
if _id and object_store is not None:
4148
object_store[_id] = o
4249
return _id
4350

44-
def _enref(o): # noqa: ANN202 ANN001
51+
def _enref(o: BaseModel) -> str | None:
4552
"""depth-first recursive, in-place enref of object; returns id of object"""
4653
ref_att_names = cra_map.get(o.type, [])
4754
for ran in ref_att_names:
@@ -76,7 +83,7 @@ def _enref(o): # noqa: ANN202 ANN001
7683
return (_id, o) if return_id_obj_tuple else o
7784

7885

79-
def ga4gh_deref(o, cra_map, object_store): # noqa: ANN201 ANN001
86+
def ga4gh_deref(o, cra_map, object_store) -> BaseModel: # noqa: ANN001
8087
"""Convert "referable attributes" in-place from referenced to inlined
8188
form.
8289
@@ -87,7 +94,7 @@ def ga4gh_deref(o, cra_map, object_store): # noqa: ANN201 ANN001
8794
8895
"""
8996

90-
def _deref(o): # noqa: ANN202 ANN001
97+
def _deref(o: BaseModel): # noqa: ANN202
9198
"""depth-first recursive, in-place deref of object; returns id of object"""
9299
if o.type not in cra_map:
93100
_logger.warning("%s not in cra_map %s", o.type, cra_map)

src/ga4gh/core/identifiers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ class use_ga4gh_compute_identifier_when(ContextDecorator): # noqa: N801
8585
def my_method():
8686
"""
8787

88-
def __init__(self, when: VrsObjectIdentifierIs):
88+
def __init__(self, when: VrsObjectIdentifierIs) -> None:
8989
self.when = when
9090
self.token = None
9191

92-
def __enter__(self): # noqa: ANN204
92+
def __enter__(self) -> None:
9393
self.token = ga4gh_compute_identifier_when.set(self.when)
9494

95-
def __exit__(self, exc_type, exc, exc_tb): # noqa: ANN204 ANN001
95+
def __exit__(self, exc_type, exc, exc_tb) -> None: # noqa: ANN001
9696
ga4gh_compute_identifier_when.reset(self.token)
9797

9898

src/ga4gh/core/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,16 @@ class Element(BaseModel, ABC):
144144
description="A list of extensions to the Entity, that allow for capture of information not directly supported by elements defined in the model.",
145145
)
146146

147+
def get_extensions_by_name(self, name: str) -> list[Extension]:
148+
"""Fetch all contained extension exactly matching the provided name
149+
150+
:param name: name of extension to fetch
151+
:return: a list of all matching extensions, empty if no matches found (or if instance contains no extensions)
152+
"""
153+
if not self.extensions:
154+
return []
155+
return [e for e in self.extensions if e.name == name]
156+
147157

148158
#########################################
149159
# General-purpose data classes

src/ga4gh/vrs/dataproxy.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,11 @@ class SeqRepoRESTDataProxy(_SeqRepoDataProxyBase):
247247

248248
rest_version = "1"
249249

250-
def __init__(self, base_url: str, disable_healthcheck: bool = False):
250+
def __init__(self, base_url: str, disable_healthcheck: bool = False) -> None:
251251
"""Initialize REST-based dataproxy instance.
252252
253253
:param base_url: root URL to server
254+
:param disable_healthcheck: Whether healthcheck should be disabled
254255
"""
255256
super().__init__()
256257
self.base_url = f"{base_url}/{self.rest_version}/"
@@ -288,12 +289,12 @@ class SequenceProxy(Sequence):
288289
289290
"""
290291

291-
def __init__(self, dp: _DataProxy, alias: str): # noqa: D107
292+
def __init__(self, dp: _DataProxy, alias: str) -> None: # noqa: D107
292293
self.dp = dp
293294
self.alias = alias
294295
self._md = self.dp.get_metadata(self.alias)
295296

296-
def __str__(self): # noqa: D105 ANN204
297+
def __str__(self) -> str: # noqa: D105
297298
return self.dp.get_sequence(self.alias)
298299

299300
def __len__(self): # noqa: D105 ANN204
@@ -303,7 +304,7 @@ def __reversed__(self): # noqa: D105 ANN204
303304
msg = "Reversed iteration of a SequenceProxy is not implemented"
304305
raise NotImplementedError(msg)
305306

306-
def __getitem__(self, key): # noqa: ANN001 ANN204
307+
def __getitem__(self, key) -> str: # noqa: ANN001
307308
"""Return sequence for key (slice), fetching if necessary"""
308309
if isinstance(key, int):
309310
key = slice(key, key + 1)
@@ -344,16 +345,18 @@ def _isoformat(o: datetime.datetime) -> str:
344345
# self.base_url = base_url
345346

346347

347-
def create_dataproxy(uri: str | None = None) -> _DataProxy:
348+
def create_dataproxy(
349+
uri: str | None = None, disable_healthcheck: bool = False
350+
) -> _DataProxy:
348351
"""Create a dataproxy from uri or GA4GH_VRS_DATAPROXY_URI
349352
350-
Currently accepted URI schemes:
351-
352-
* seqrepo+file:///path/to/seqrepo/root
353-
* seqrepo+:../relative/path/to/seqrepo/root
354-
* seqrepo+http://localhost:5000/seqrepo
355-
* seqrepo+https://somewhere:5000/seqrepo
353+
:param uri: Dataproxy URI. Currently accepted URI schemes:
356354
355+
* seqrepo+file:///path/to/seqrepo/root
356+
* seqrepo+:../relative/path/to/seqrepo/root
357+
* seqrepo+http://localhost:5000/seqrepo
358+
* seqrepo+https://somewhere:5000/seqrepo
359+
:param disable_healthcheck: Whether healthcheck should be disabled in REST dataproxy
357360
:raise ValueError: if URI doesn't match recognized schemes, e.g. is missing provider
358361
prefix (`"seqrepo+"`)
359362
"""
@@ -379,7 +382,9 @@ def create_dataproxy(uri: str | None = None) -> _DataProxy:
379382
sr = SeqRepo(root_dir=parsed_uri.path)
380383
dp = SeqRepoDataProxy(sr)
381384
elif proto in ("http", "https"):
382-
dp = SeqRepoRESTDataProxy(uri[len(provider) + 1 :])
385+
dp = SeqRepoRESTDataProxy(
386+
uri[len(provider) + 1 :], disable_healthcheck=disable_healthcheck
387+
)
383388
else:
384389
msg = f"SeqRepo URI scheme {parsed_uri.scheme} not implemented"
385390
raise ValueError(msg)

src/ga4gh/vrs/enderef.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
from pydantic.main import BaseModel
2+
13
from ga4gh.core import ga4gh_deref, ga4gh_enref
24

35
from .models import class_refatt_map
46

57

6-
def vrs_enref(o, object_store=None, return_id_obj_tuple=False):
8+
def vrs_enref(o, object_store=None, return_id_obj_tuple: bool = False):
79
return ga4gh_enref(
810
o,
911
cra_map=class_refatt_map,
@@ -12,5 +14,5 @@ def vrs_enref(o, object_store=None, return_id_obj_tuple=False):
1214
)
1315

1416

15-
def vrs_deref(o, object_store):
17+
def vrs_deref(o, object_store) -> BaseModel:
1618
return ga4gh_deref(o, cra_map=class_refatt_map, object_store=object_store)

src/ga4gh/vrs/extras/object_store.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ class Sqlite3MutableMapping(MutableMapping):
1313
If not used as a contextmanager, user must call commit and/or close.
1414
"""
1515

16-
def __init__(self, sqlite3_db: str | sqlite3.Connection, autocommit: bool = True):
16+
def __init__(
17+
self, sqlite3_db: str | sqlite3.Connection, autocommit: bool = True
18+
) -> None:
1719
"""Connect to the sqlite3 database specified by an existing sqlite3.Connection
1820
or a connection string.
1921
@@ -96,10 +98,10 @@ def __len__(self):
9698
finally:
9799
cur.close()
98100

99-
def commit(self):
101+
def commit(self) -> None:
100102
self.db.commit()
101103

102-
def close(self):
104+
def close(self) -> None:
103105
with self._closed_lock:
104106
if not self._closed:
105107
self.commit()
@@ -110,5 +112,5 @@ def __enter__(self):
110112
self.db.__enter__()
111113
return self
112114

113-
def __exit__(self, exc_type, exc_value, traceback):
115+
def __exit__(self, exc_type, exc_value, traceback) -> None:
114116
self.db.__exit__(exc_type, exc_value, traceback)

src/ga4gh/vrs/extras/translator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def __init__(
7575
default_assembly_name: str = "GRCh38",
7676
identify: bool = True,
7777
rle_seq_limit: int | None = 50,
78-
):
78+
) -> None:
7979
self.default_assembly_name = default_assembly_name
8080
self.data_proxy = data_proxy
8181
self.identify = identify
@@ -175,7 +175,7 @@ def __init__(
175175
data_proxy: _DataProxy,
176176
default_assembly_name: str = "GRCh38",
177177
identify: bool = True,
178-
):
178+
) -> None:
179179
"""Initialize AlleleTranslator class"""
180180
super().__init__(data_proxy, default_assembly_name, identify)
181181

@@ -528,7 +528,7 @@ def __init__(
528528
data_proxy: _DataProxy,
529529
default_assembly_name: str = "GRCh38",
530530
identify: bool = True,
531-
):
531+
) -> None:
532532
"""Initialize CnvTranslator class"""
533533
super().__init__(data_proxy, default_assembly_name, identify)
534534
self.from_translators = {

src/ga4gh/vrs/models.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
def flatten(vals):
5151
"""Flatten vals recursively, lazily using yield"""
5252

53-
def is_coll(thing):
53+
def is_coll(thing) -> bool:
5454
"""Return True if the thing looks like a collection.
5555
5656
This is not exhaustive, do not use in general.
@@ -77,7 +77,7 @@ def flatten_type(t):
7777
return [t]
7878

7979

80-
def overlaps(a: list, b: list):
80+
def overlaps(a: list, b: list) -> bool:
8181
"""Return true if there are any elements in common between a and b"""
8282
return len(set(a).intersection(set(b))) > 0
8383

@@ -266,7 +266,7 @@ class _ValueObject(Entity, ABC):
266266
See https://en.wikipedia.org/wiki/Value_object for more on Value Objects.
267267
"""
268268

269-
def __hash__(self):
269+
def __hash__(self) -> int:
270270
return encode_canonical_json(self.ga4gh_serialize()).decode("utf-8").__hash__()
271271

272272
def ga4gh_serialize(self) -> dict:
@@ -298,14 +298,14 @@ class Ga4ghIdentifiableObject(_ValueObject, ABC):
298298
description="A sha512t24u digest created using the VRS Computed Identifier algorithm.",
299299
)
300300

301-
def __lt__(self, other):
301+
def __lt__(self, other) -> bool:
302302
return self.get_or_create_digest() < other.get_or_create_digest()
303303

304304
@staticmethod
305305
def is_ga4gh_identifiable() -> bool:
306306
return True
307307

308-
def has_valid_ga4gh_id(self):
308+
def has_valid_ga4gh_id(self) -> bool | str | None:
309309
return self.id and GA4GH_IR_REGEXP.match(self.id) is not None
310310

311311
def compute_digest(
@@ -372,7 +372,7 @@ def get_or_create_ga4gh_identifier(
372372
else:
373373
return self.compute_ga4gh_identifier(recompute)
374374

375-
def compute_ga4gh_identifier(self, recompute: bool = False, as_version=None):
375+
def compute_ga4gh_identifier(self, recompute: bool = False, as_version=None) -> str:
376376
"""Return a GA4GH Computed Identifier.
377377
378378
If ``as_version`` is provided, other parameters are ignored and a computed
@@ -629,7 +629,7 @@ def validate_start_end(
629629
raise ValueError(err_msg)
630630
return v
631631

632-
def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion):
632+
def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion) -> str:
633633
"""Return a serialized string following the conventions for SequenceLocation
634634
serialization as defined in the VRS version specified by ``as_version``.
635635
@@ -661,7 +661,7 @@ def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion):
661661
msg = f"Received an unexpected value for `as_version`: {as_version}. MUST be an instance of `PrevVrsVersion`."
662662
raise TypeError(msg)
663663

664-
def get_refget_accession(self):
664+
def get_refget_accession(self) -> str | None:
665665
if isinstance(self.sequenceReference, SequenceReference):
666666
return self.sequenceReference.refgetAccession
667667
if isinstance(self.sequenceReference, iriReference):
@@ -769,7 +769,7 @@ class Allele(_VariationBase, BaseModelForbidExtra):
769769
Field(..., description="An expression of the sequence state")
770770
)
771771

772-
def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion):
772+
def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion) -> str:
773773
"""Return a serialized string following the conventions for
774774
Allele serialization as defined in the VRS version specified by 'as_version`.
775775

src/ga4gh/vrs/normalize.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from bioutils.normalize import NormalizationMode
1313
from bioutils.normalize import normalize as _normalize
14+
from pydantic.main import BaseModel
1415

1516
from ga4gh.core import ga4gh_digest, is_pydantic_instance, pydantic_copy
1617
from ga4gh.vrs import models
@@ -83,7 +84,9 @@ def _get_new_allele_location_pos(
8384
return val
8485

8586

86-
def _normalize_allele(input_allele: models.Allele, data_proxy, rle_seq_limit=50):
87+
def _normalize_allele(
88+
input_allele: models.Allele, data_proxy: _DataProxy, rle_seq_limit: int = 50
89+
):
8790
"""Normalize Allele using "fully-justified" normalization adapted from NCBI's
8891
VOCA. Fully-justified normalization expands such ambiguous representation over the
8992
entire region of ambiguity, resulting in an unambiguous representation that may be
@@ -305,7 +308,7 @@ def denormalize_reference_length_expression(
305308
return alt
306309

307310

308-
def _factor_gen(n):
311+
def _factor_gen(n: int):
309312
"""Yield all factors of an integer `n`, in descending order"""
310313
lower_factors = []
311314
i = 1
@@ -319,7 +322,11 @@ def _factor_gen(n):
319322

320323

321324
def _define_rle_allele(
322-
allele, length, repeat_subunit_length, rle_seq_limit, extended_alt_seq
325+
allele: BaseModel,
326+
length: int,
327+
repeat_subunit_length: int,
328+
rle_seq_limit,
329+
extended_alt_seq,
323330
):
324331
# Otherwise, create the Allele as an RLE
325332
allele.state = models.ReferenceLengthExpression(
@@ -332,7 +339,7 @@ def _define_rle_allele(
332339
return allele
333340

334341

335-
def _is_valid_cycle(template_start, template, target):
342+
def _is_valid_cycle(template_start, template: str, target) -> bool:
336343
cycle = itertools.cycle(template[template_start:])
337344
for char in target[len(template) :]: # noqa: SIM110
338345
if char != next(cycle):

0 commit comments

Comments
 (0)