Skip to content

Commit 3f899ec

Browse files
committed
refactor genomic table initializer + make it optional
1 parent 5272243 commit 3f899ec

1 file changed

Lines changed: 55 additions & 54 deletions

File tree

src/cool_seq_tool/sources/uta_database.py

Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -134,52 +134,50 @@ async def execute_query(
134134
raise
135135

136136
async def create_genomic_table(self) -> None:
137-
"""Create table containing genomic accession information."""
138-
check_table_exists = """
139-
SELECT EXISTS (
140-
SELECT FROM information_schema.tables
141-
WHERE table_name = 'genomic'
142-
);
137+
"""Create the derived ``genomic`` table in the current schema if needed."""
138+
create_genomic_table = """
139+
CREATE TABLE IF NOT EXISTS genomic AS
140+
SELECT
141+
t.hgnc,
142+
aes.alt_ac,
143+
aes.alt_aln_method,
144+
aes.alt_strand,
145+
ae.start_i AS alt_start_i,
146+
ae.end_i AS alt_end_i
147+
FROM transcript t
148+
JOIN exon_set tes
149+
ON t.ac = tes.tx_ac
150+
AND tes.alt_aln_method = 'transcript'
151+
JOIN exon_set aes
152+
ON t.ac = aes.tx_ac
153+
AND aes.alt_aln_method <> 'transcript'
154+
JOIN exon te
155+
ON tes.exon_set_id = te.exon_set_id
156+
JOIN exon ae
157+
ON aes.exon_set_id = ae.exon_set_id
158+
AND te.ord = ae.ord
159+
LEFT JOIN exon_aln ea
160+
ON te.exon_id = ea.tx_exon_id
161+
AND ae.exon_id = ea.alt_exon_id;
162+
"""
163+
await self.execute_query(create_genomic_table)
164+
165+
indexes = [
143166
"""
144-
genomic_table_exists = await (
145-
await self.execute_query(check_table_exists)
146-
).fetchone()
147-
if genomic_table_exists is None:
148-
_logger.critical(
149-
"SELECT EXISTS query in UtaDatabase._create_genomic_table "
150-
"returned invalid response"
151-
)
152-
msg = "SELECT EXISTS query returned invalid response"
153-
raise ValueError(msg)
154-
if not genomic_table_exists[0]:
155-
create_genomic_table = """
156-
CREATE TABLE genomic AS
157-
SELECT t.hgnc, aes.alt_ac, aes.alt_aln_method,
158-
aes.alt_strand, ae.start_i AS alt_start_i,
159-
ae.end_i AS alt_end_i
160-
FROM (((((transcript t
161-
JOIN exon_set tes ON (((t.ac = tes.tx_ac)
162-
AND (tes.alt_aln_method = 'transcript'::text))))
163-
JOIN exon_set aes ON (((t.ac = aes.tx_ac)
164-
AND (aes.alt_aln_method <> 'transcript'::text))))
165-
JOIN exon te ON
166-
((tes.exon_set_id = te.exon_set_id)))
167-
JOIN exon ae ON
168-
(((aes.exon_set_id = ae.exon_set_id)
169-
AND (te.ord = ae.ord))))
170-
LEFT JOIN exon_aln ea ON
171-
(((te.exon_id = ea.tx_exon_id) AND
172-
(ae.exon_id = ea.alt_exon_id))));
173-
"""
174-
await self.execute_query(create_genomic_table)
175-
176-
indexes = [
177-
"CREATE INDEX alt_pos_index ON genomic (alt_ac, alt_start_i, alt_end_i);",
178-
"CREATE INDEX gene_alt_index ON genomic (hgnc, alt_ac);",
179-
"CREATE INDEX alt_ac_index ON genomic (alt_ac);",
180-
]
181-
for create_index in indexes:
182-
await self.execute_query(create_index)
167+
CREATE INDEX IF NOT EXISTS alt_pos_index
168+
ON genomic (alt_ac, alt_start_i, alt_end_i);
169+
""",
170+
"""
171+
CREATE INDEX IF NOT EXISTS gene_alt_index
172+
ON genomic (hgnc, alt_ac);
173+
""",
174+
"""
175+
CREATE INDEX IF NOT EXISTS alt_ac_index
176+
ON genomic (alt_ac);
177+
""",
178+
]
179+
for create_index in indexes:
180+
await self.execute_query(create_index)
183181

184182
async def get_alt_ac_start_or_end(
185183
self, tx_ac: str, tx_exon_start: int, tx_exon_end: int, gene: str | None
@@ -967,7 +965,7 @@ def _get_secret_args() -> str:
967965

968966

969967
async def create_uta_connection_pool(
970-
db_url: str | None = None,
968+
db_url: str | None = None, initialize_genomic_table: bool = True
971969
) -> AsyncConnectionPool:
972970
"""Create and initialize a UTA connection pool.
973971
@@ -984,6 +982,8 @@ async def create_uta_connection_pool(
984982
985983
:param db_url: PostgreSQL connection URI (e.g., ``postgresql://user@host:port/db?options=-csearch_path%3Duta_schema,public``).
986984
If not provided, resolved from environment or defaults.
985+
:param initialize_genomic_table: whether to attempt initialization of the ``genomic``
986+
table which is used/managed by coolseqtool.
987987
:return: An open ``AsyncConnectionPool`` configured for the UTA database
988988
"""
989989
if "UTA_DB_PROD" in os.environ:
@@ -996,14 +996,15 @@ async def create_uta_connection_pool(
996996
)
997997
pool = AsyncConnectionPool(conninfo=db_url, open=False)
998998
await pool.open()
999-
try:
1000-
async with pool.connection() as conn:
1001-
await UtaRepository(conn).create_genomic_table()
1002-
# catch all exceptions -- this is probably a critical error, it's good to
1003-
# close the pool first
1004-
except:
1005-
await pool.close()
1006-
raise
999+
if initialize_genomic_table:
1000+
try:
1001+
async with pool.connection() as conn:
1002+
await UtaRepository(conn).create_genomic_table()
1003+
# catch all exceptions -- this is probably a critical error, it's good to
1004+
# close the pool first
1005+
except:
1006+
await pool.close()
1007+
raise
10071008
return pool
10081009

10091010

0 commit comments

Comments
 (0)