Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 40 additions & 22 deletions api/Archive/analyze_efp_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,21 @@

# Extra columns that some databases have (we want to know which ones)
EXTRA_COLUMNS = {
"channel", "data_call", "data_num", "data_p_val", "data_p_value",
"genome", "genome_id", "log", "orthogroup", "p_val", "project_id",
"qvalue", "sample_file_name", "sample_tissue", "version",
"channel",
"data_call",
"data_num",
"data_p_val",
"data_p_value",
"genome",
"genome_id",
"log",
"orthogroup",
"p_val",
"project_id",
"qvalue",
"sample_file_name",
"sample_tissue",
"version",
}


Expand Down Expand Up @@ -94,7 +106,9 @@ def main():

# ---- 2. Group databases by their 3-column signature ----
print("\n" + "=" * 80)
print("GROUPING BY SIGNATURE (probeset_type, probeset_nullable, signal_nullable, signal_default, bot_type, bot_nullable)")
print(
"GROUPING BY SIGNATURE (probeset_type, probeset_nullable, signal_nullable, signal_default, bot_type, bot_nullable)"
)
print("=" * 80)

sig_groups = defaultdict(list)
Expand All @@ -103,7 +117,9 @@ def main():
sig_groups[sig].append(db)

for sig, dbs in sorted(sig_groups.items(), key=lambda x: -len(x[1])):
print(f"\n Signature: probeset={sig[0]}(nullable={sig[1]}) signal(nullable={sig[2]}, default={sig[3]}) bot={sig[4]}(nullable={sig[5]})")
print(
f"\n Signature: probeset={sig[0]}(nullable={sig[1]}) signal(nullable={sig[2]}, default={sig[3]}) bot={sig[4]}(nullable={sig[5]})"
)
print(f" Count: {len(dbs)}")
print(f" DBs: {', '.join(dbs[:10])}{'...' if len(dbs) > 10 else ''}")

Expand Down Expand Up @@ -135,15 +151,17 @@ def main():
# Determine extra columns this DB needs
extras = set(cols.keys()) - NEEDED_COLUMNS - {"proj_id", "sample_id"}

compact_entries.append({
"db": db,
"probeset_len": probeset_len, # None = tinytext
"probeset_type": probeset_type,
"bot_len": bot_len, # None = tinytext
"bot_type": bot_type,
"signal_nullable": signal_nullable,
"extras": extras,
})
compact_entries.append(
{
"db": db,
"probeset_len": probeset_len, # None = tinytext
"probeset_type": probeset_type,
"bot_len": bot_len, # None = tinytext
"bot_type": bot_type,
"signal_nullable": signal_nullable,
"extras": extras,
}
)

# ---- 4. Show the most compact table-driven representation ----
print("\n" + "=" * 80)
Expand Down Expand Up @@ -180,9 +198,7 @@ def main():
for e in compact_entries:
# Filter out databases that ONLY have unneeded extras
# (sample_file_name, data_call, data_p_val etc. are not needed)
has_important_extras = e["extras"] - {
"sample_file_name", "data_call", "data_p_val", "data_p_value", "data_num"
}
has_important_extras = e["extras"] - {"sample_file_name", "data_call", "data_p_val", "data_p_value", "data_num"}
if has_important_extras:
complex_dbs.append(e)
else:
Expand Down Expand Up @@ -217,11 +233,13 @@ def main():
with open(SAMPLE_DATA_CSV, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
db_samples[row["source_database"]].append({
"data_bot_id": row["data_bot_id"],
"data_probeset_id": row["data_probeset_id"],
"data_signal": row["data_signal"],
})
db_samples[row["source_database"]].append(
{
"data_bot_id": row["data_bot_id"],
"data_probeset_id": row["data_probeset_id"],
"data_signal": row["data_signal"],
}
)

print(f"Total databases with sample data: {len(db_samples)}")
print(f"Total sample rows: {sum(len(v) for v in db_samples.values())}")
Expand Down
7 changes: 4 additions & 3 deletions api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def create_app():
mysql_efp_base = bar_app.config.get("MYSQL_EFP_BASE_URI")
if mysql_efp_base:
from api.models.efp_schemas import SIMPLE_EFP_DATABASE_SCHEMAS

binds = bar_app.config.get("SQLALCHEMY_BINDS") or {}
base = mysql_efp_base.rstrip("/")
for db_name in SIMPLE_EFP_DATABASE_SCHEMAS:
Expand Down Expand Up @@ -70,9 +71,9 @@ def create_app():
# On BAR, MySQL binds come from the server config — never build SQLite mirrors there.
# For CI and local dev, determine whether to build SQLite mirrors.
needs_sqlite_mirrors = (
is_ci # always build on CI
or bar_app.config.get("TESTING") # config requests test mode
or "pytest" in os.sys.modules # running under pytest
is_ci # always build on CI
or bar_app.config.get("TESTING") # config requests test mode
or "pytest" in os.sys.modules # running under pytest
or os.environ.get("BAR_API_AUTO_SQLITE_MIRRORS") == "1" # explicit override
)

Expand Down
2 changes: 1 addition & 1 deletion api/models/bar_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Bridge file to maintain backward compatibility with imports
from api.utils.bar_utils import BARUtils

__all__ = ['BARUtils']
__all__ = ["BARUtils"]
31 changes: 16 additions & 15 deletions api/resources/fastpheno.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from api.utils.bar_utils import BARUtils
from markupsafe import escape


fastpheno = Namespace("FastPheno", description="FastPheno API service", path="/fastpheno")


Expand Down Expand Up @@ -240,9 +239,7 @@ def get(self, tree_site_id, band):
class FastPhenoSites(Resource):
def get(self):
"""Returns all sites with coordinates, for initializing the map view."""
rows = db.session.execute(
db.select(Sites).order_by(Sites.site_name)
).scalars().all()
rows = db.session.execute(db.select(Sites).order_by(Sites.site_name)).scalars().all()

res = [
{
Expand All @@ -266,11 +263,11 @@ def get(self, sites_pk):
if not BARUtils.is_integer(str(sites_pk)):
return BARUtils.error_exit("Invalid sites_pk"), 400

rows = db.session.execute(
db.select(Flights)
.where(Flights.sites_pk == sites_pk)
.order_by(Flights.flight_date)
).scalars().all()
rows = (
db.session.execute(db.select(Flights).where(Flights.sites_pk == sites_pk).order_by(Flights.flight_date))
.scalars()
.all()
)

if len(rows) == 0:
return BARUtils.error_exit("No flights found for the given site"), 400
Expand All @@ -297,12 +294,16 @@ def get(self, flights_pk):
if not BARUtils.is_integer(str(flights_pk)):
return BARUtils.error_exit("Invalid flights_pk"), 400

rows = db.session.execute(
db.select(Bands.band)
.where(Bands.flights_pk == flights_pk)
.distinct()
.order_by(db.func.cast(db.func.regexp_replace(Bands.band, "[^0-9]", ""), db.Integer))
).scalars().all()
rows = (
db.session.execute(
db.select(Bands.band)
.where(Bands.flights_pk == flights_pk)
.distinct()
.order_by(db.func.cast(db.func.regexp_replace(Bands.band, "[^0-9]", ""), db.Integer))
)
.scalars()
.all()
)

if len(rows) == 0:
return BARUtils.error_exit("No bands found for the given flight"), 400
Expand Down
12 changes: 5 additions & 7 deletions api/resources/gene_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
)

gene_expression = Namespace(
'Gene Expression',
description='Gene expression data from BAR eFP databases',
path='/gene_expression',
"Gene Expression",
description="Gene expression data from BAR eFP databases",
path="/gene_expression",
)


@gene_expression.route("/expression/<string:database>/<string:gene_id>")
@gene_expression.doc(
description="Retrieve gene expression values from a specified eFP database."
)
@gene_expression.doc(description="Retrieve gene expression values from a specified eFP database.")
@gene_expression.param(
"gene_id",
"Gene ID (e.g. AT1G01010 for Arabidopsis, or a probeset like 261585_at)",
Expand Down Expand Up @@ -76,4 +74,4 @@ def get(self, database, gene_id):
return BARUtils.error_exit(result["error"]), result.get("error_code", 500)


gene_expression.add_resource(GeneExpression, '/expression/<string:database>/<string:gene_id>')
gene_expression.add_resource(GeneExpression, "/expression/<string:database>/<string:gene_id>")
13 changes: 6 additions & 7 deletions api/resources/gene_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from api import db
from sqlalchemy import func


gene_information = Namespace("Gene Information", description="Information about Genes", path="/gene_information")

parser = gene_information.parser()
Expand Down Expand Up @@ -91,9 +90,9 @@ def post(self):

# Query must be run individually for each species
lowered_genes = [gene.lower() for gene in genes]
rows = db.session.execute(
db.select(database).where(func.lower(database.agi).in_(lowered_genes))
).scalars().all()
rows = (
db.session.execute(db.select(database).where(func.lower(database.agi).in_(lowered_genes))).scalars().all()
)

# If there are any isoforms found, return data
data = []
Expand Down Expand Up @@ -286,9 +285,9 @@ def post(self):
gene_ids = []
gene_fail = []
for one_term in terms:
query = db.select(alias_database.agi).where(
func.lower(alias_database.agi).contains(one_term.lower())
).limit(1)
query = (
db.select(alias_database.agi).where(func.lower(alias_database.agi).contains(one_term.lower())).limit(1)
)
result = db.session.execute(query).fetchone()
if result is not None:
gene_ids.append(result[0])
Expand Down
23 changes: 7 additions & 16 deletions api/resources/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def get(self, tag=""):
"image_url": ex.image_url,
"grn_title": ex.grn_title,
"cyjs_layout": _normalize_cyjs_layout(ex.cyjs_layout),
"tag": "|".join(_sort_tag_strings(src_tag_match[ex.source_id]))
"tag": "|".join(_sort_tag_strings(src_tag_match[ex.source_id])),
}
result.append(one_source[source_id])

Expand Down Expand Up @@ -544,7 +544,7 @@ def get(self, number=""):
"url": row.url,
"image_url": row.image_url,
"grn_title": row.grn_title,
"cyjs_layout": _normalize_cyjs_layout(row.cyjs_layout)
"cyjs_layout": _normalize_cyjs_layout(row.cyjs_layout),
}
)

Expand Down Expand Up @@ -607,20 +607,14 @@ def get(self, stringAGI=""):
"source_name": row.source_name,
"comments": row.comments,
"cyjs_layout": _normalize_cyjs_layout(row.cyjs_layout),
"tags": []
"tags": [],
}

tag_entry = f"{row.tag_name}:{row.tag_group}"
if tag_entry not in result_dict[source_id]["tags"]: # DISTINCT
result_dict[source_id]["tags"].append(tag_entry)

result = [
{
**data,
"tags": "|".join(_sort_tag_strings(data["tags"]))
}
for data in result_dict.values()
]
result = [{**data, "tags": "|".join(_sort_tag_strings(data["tags"]))} for data in result_dict.values()]
result.sort(key=lambda item: (item["grn_title"] or ""))

if len(result) == 0:
Expand Down Expand Up @@ -681,18 +675,15 @@ def get(self, AGI_1="", AGI_2=""):
"source_name": row.source_name,
"comments": row.comments,
"cyjs_layout": _normalize_cyjs_layout(row.cyjs_layout),
"tags": []
"tags": [],
}

tag_entry = f"{row.tag_name}:{row.tag_group}"
if tag_entry not in result_dict[source_id]["tags"]:
result_dict[source_id]["tags"].append(tag_entry)

result = [
{
**data,
"tags": "|".join(_sort_tag_strings_natural_case_sensitive(data["tags"]))
}
{**data, "tags": "|".join(_sort_tag_strings_natural_case_sensitive(data["tags"]))}
for data in result_dict.values()
]
result.sort(key=lambda item: item["source_id"])
Expand Down Expand Up @@ -746,7 +737,7 @@ def get(self):
"image_url": ex.image_url,
"grn_title": ex.grn_title,
"cyjs_layout": _normalize_cyjs_layout(ex.cyjs_layout),
"tag": "|".join(_sort_tag_strings(src_tag_match[ex.source_id]))
"tag": "|".join(_sort_tag_strings(src_tag_match[ex.source_id])),
}
result.append(one_source[source_id])

Expand Down
1 change: 0 additions & 1 deletion api/resources/llama3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from api import db
from api.models.llama3 import Summaries


llama3 = Namespace("LLaMA", description="Endpoint for retreiving LLaMA3 results", path="/LLaMA")


Expand Down
Loading
Loading