Skip to content

Commit 0daaf79

Browse files
committed
Replace regexes with central version
1 parent 56eb634 commit 0daaf79

5 files changed

Lines changed: 57 additions & 16 deletions

File tree

dataflow_transfer/run_classes/element_runs.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
class ElementRun(Run):
77
"""Defines an Element sequencing run"""
88

9+
run_family = "Element"
10+
911
def __init__(self, run_dir, configuration):
1012
super().__init__(run_dir, configuration)
1113
self.final_file = "RunUploaded.json"
@@ -18,9 +20,6 @@ class AVITIRun(ElementRun):
1820
run_type = "AVITI"
1921

2022
def __init__(self, run_dir, configuration):
21-
self.run_id_format = (
22-
r"^\d{8}_AV\d{6}_(A|B)\d{10}$" # 20251007_AV242106_A2507535225
23-
)
2423
super().__init__(run_dir, configuration)
2524
self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225
2625

dataflow_transfer/run_classes/generic_runs.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
class Run:
1313
"""Defines a generic sequencing run"""
1414

15+
run_type = None
16+
run_family = None
17+
default_run_id_format = None
18+
1519
def __init__(self, run_dir, configuration):
1620
self.run_dir = run_dir
1721
self.run_id = os.path.basename(run_dir)
@@ -33,6 +37,24 @@ def __init__(self, run_dir, configuration):
3337
)
3438
self.remote_destination = self.sequencer_config.get("remote_destination")
3539
self.db = StatusdbSession(self.configuration.get("statusdb"))
40+
self.run_id_format = self._resolve_run_id_format()
41+
42+
def _resolve_run_id_format(self):
43+
"""Resolve the run ID regex from central config."""
44+
run_id_format = None
45+
if self.run_family and self.run_type:
46+
try:
47+
run_id_format = self.db.get_regex_pattern(
48+
self.run_family, self.run_type
49+
)
50+
except Exception as exc:
51+
logger.warning(
52+
"Unable to load run_id_format for %s from regex config: %s",
53+
self.run_type,
54+
exc,
55+
)
56+
57+
return run_id_format
3658

3759
def confirm_run_type(self):
3860
"""Compare run ID with expected format for the run type."""

dataflow_transfer/run_classes/illumina_runs.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
class IlluminaRun(Run):
77
"""Defines an Illumina sequencing run"""
88

9+
run_family = "Illumina"
10+
911
def __init__(self, run_dir, configuration):
1012
super().__init__(run_dir, configuration)
1113
self.final_file = "CopyComplete.txt"
@@ -19,9 +21,6 @@ class NovaSeqXPlusRun(IlluminaRun):
1921
run_type = "NovaSeqXPlus"
2022

2123
def __init__(self, run_dir, configuration):
22-
self.run_id_format = (
23-
r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]+$" # 20251010_LH00202_0284_B22CVHTLT1
24-
)
2524
super().__init__(run_dir, configuration)
2625
self.flowcell_id = self.run_id.split("_")[-1][1:] # 22CVHTLT1
2726

@@ -33,9 +32,6 @@ class NextSeqRun(IlluminaRun):
3332
run_type = "NextSeq"
3433

3534
def __init__(self, run_dir, configuration):
36-
self.run_id_format = (
37-
r"^\d{6}_[A-Z0-9]+_\d{3}_[A-Z0-9]+$" # 251015_VH00203_572_AAHFHCCM5
38-
)
3935
super().__init__(run_dir, configuration)
4036

4137

@@ -46,9 +42,6 @@ class MiSeqRun(IlluminaRun):
4642
run_type = "MiSeq"
4743

4844
def __init__(self, run_dir, configuration):
49-
self.run_id_format = (
50-
r"^\d{6}_[A-Z0-9]+_\d{4}_[A-Z0-9\-]+$" # 251015_M01548_0646_000000000-M6D7K
51-
)
5245
super().__init__(run_dir, configuration)
5346

5447

@@ -59,6 +52,5 @@ class MiSeqi100Run(IlluminaRun):
5952
run_type = "MiSeqi100"
6053

6154
def __init__(self, run_dir, configuration):
62-
self.run_id_format = r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]{10}-SC3$" # 20260128_SH01140_0002_ASC2150561-SC3
6355
super().__init__(run_dir, configuration)
6456
self.flowcell_id = self.run_id.split("_")[-1][1:] # SC2150561-SC3

dataflow_transfer/run_classes/ont_runs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
class ONTRun(Run):
77
"""Defines a ONT sequencing run"""
88

9+
run_family = "ONT"
10+
911
def __init__(self, run_dir, configuration):
1012
super().__init__(run_dir, configuration)
1113
self.final_file = "final_summary.txt"
@@ -19,7 +21,6 @@ class PromethIONRun(ONTRun):
1921
run_type = "PromethION"
2022

2123
def __init__(self, run_dir, configuration):
22-
self.run_id_format = r"^\d{8}_\d{4}_[A-Z0-9]{2}_P[A-Z0-9]+_[a-f0-9]{8}$" # 20251015_1051_3B_PBG60686_0af3a2e0
2324
super().__init__(run_dir, configuration)
2425

2526

@@ -30,5 +31,4 @@ class MinIONRun(ONTRun):
3031
run_type = "MinION"
3132

3233
def __init__(self, run_dir, configuration):
33-
self.run_id_format = r"^\d{8}_\d{4}_MN[A-Z0-9]+_[A-Z0-9]+_[a-f0-9]{8}$" # 20240229_1404_MN19414_ASH657_7a74bf8f
3434
super().__init__(run_dir, configuration)

dataflow_transfer/utils/statusdb.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ def _retry_call(self, func):
5757

5858
def get_db_doc(self, ddoc, view, run_id):
5959
"""Retrieve a document from the database via retried call."""
60-
doc_id = self.get_doc_id(ddoc, view, run_id)
60+
doc_id = self.get_doc_id(
61+
ddoc, view, run_id
62+
) # TODO: refactor to use get_document
6163
if doc_id:
6264
return self._retry_call(
6365
lambda: self.connection.get_document(
@@ -66,6 +68,32 @@ def get_db_doc(self, ddoc, view, run_id):
6668
)
6769
return None
6870

71+
def get_document(self, db, doc_id):
72+
"""Retrieve a document from any database via retried call."""
73+
return self._retry_call(
74+
lambda: self.connection.get_document(db=db, doc_id=doc_id).get_result()
75+
)
76+
77+
def get_regex_pattern(
78+
self,
79+
run_family,
80+
run_type,
81+
regex_db="gs_configs",
82+
regex_doc_id="regex_patterns",
83+
):
84+
"""Lookup the python regex pattern for a run type from the central regex config document."""
85+
regex_doc = self.get_document(db=regex_db, doc_id=regex_doc_id)
86+
if not regex_doc:
87+
return None
88+
89+
flowcell_patterns = regex_doc.get("flowcell_patterns", {})
90+
family_patterns = flowcell_patterns.get(run_family, {})
91+
if not family_patterns:
92+
return None
93+
94+
pattern = family_patterns.get(run_type)
95+
return pattern
96+
6997
def get_doc_id(self, ddoc, view, run_id):
7098
"""Retrieve a document ID from the database via retried call."""
7199
result = self._retry_call(

0 commit comments

Comments
 (0)