Skip to content

Commit e5ace17

Browse files
committed
removed POST from efp proxy and simplified endpoints to a single one
1 parent 2271479 commit e5ace17

1 file changed

Lines changed: 2 additions & 306 deletions

File tree

api/resources/efp_proxy.py

Lines changed: 2 additions & 306 deletions
Original file line numberDiff line numberDiff line change
@@ -1,243 +1,18 @@
1-
import requests
2-
import json
3-
import os
4-
from typing import List, Optional
5-
from collections import OrderedDict
61
from flask_restx import Namespace, Resource
7-
from flask import request, current_app
82
from markupsafe import escape
9-
from sqlalchemy.engine.url import make_url
10-
from sqlalchemy.exc import SQLAlchemyError
113

12-
from api.models.efp_dynamic import SIMPLE_EFP_SAMPLE_MODELS
13-
from api.models.efp_schemas import SIMPLE_EFP_DATABASE_SCHEMAS
14-
from api.services.efp_bootstrap import bootstrap_simple_efp_databases
154
from api.services.efp_data import query_efp_database_dynamic
165

17-
# efp proxy namespace provides two endpoints for gene expression data
18-
# 1. /values talks to the live bar eplant cgi
19-
# 2. /expression reads from our local or remote databases using one shared query
206
efp_proxy_ns = Namespace(
217
'efp Proxy',
22-
description='Expression data retrieveal service from BAR eplant databse.',
8+
description='Gene expression data from BAR eFP databases',
239
path='/efp_proxy',
2410
)
2511

2612

27-
# rest endpoint that proxies requests to the external bar eplant api
28-
# supports urls like /efp_proxy/values/atgenexp_stress/AT1G01010
29-
@efp_proxy_ns.route("/values/<string:database>/<string:gene_id>")
30-
@efp_proxy_ns.doc(
31-
description="Proxies requests to BAR ePlant API: /efp_proxy/values/{database}/{gene_id}",
32-
params=OrderedDict([
33-
(
34-
"database",
35-
{
36-
"description": "Database/datasource for arabidopsis view (e.g., atgenexp_stress)",
37-
"in": "path",
38-
"default": "atgenexp_stress",
39-
},
40-
),
41-
(
42-
"gene_id",
43-
{
44-
"description": "Gene ID to query (e.g., AT1G01010)",
45-
"in": "path",
46-
"default": "AT1G01010",
47-
},
48-
),
49-
(
50-
"samples",
51-
{
52-
"description": "Optional list of sample IDs (repeat ?samples=SampleA&samples=SampleB); omit to fetch all samples. Legacy JSON arrays are still accepted.",
53-
"in": "query",
54-
"default": "",
55-
},
56-
),
57-
]),
58-
)
59-
class EFPValues(Resource):
60-
@staticmethod
61-
def parse_samples_query_values(raw_values: Optional[List[str]]) -> Optional[List[str]]:
62-
"""Normalize optional samples from the query string so legacy formats still work."""
63-
if not raw_values:
64-
return None
65-
66-
filtered = [value for value in raw_values if value]
67-
if not filtered:
68-
return None
69-
70-
if len(filtered) > 1:
71-
return filtered
72-
73-
candidate = filtered[0].strip()
74-
if not candidate:
75-
return None
76-
77-
# interpret json array strings because legacy clients sent one json string value
78-
if candidate.startswith("[") and candidate.endswith("]"):
79-
try:
80-
parsed = json.loads(candidate)
81-
if isinstance(parsed, list):
82-
return [str(item).strip() for item in parsed if isinstance(item, str) and item.strip()]
83-
except json.JSONDecodeError:
84-
pass
85-
86-
if "," in candidate:
87-
# support comma-separated lists by splitting manually
88-
split_values = [item.strip() for item in candidate.split(",") if item.strip()]
89-
if split_values:
90-
return split_values
91-
92-
return [candidate]
93-
94-
@staticmethod
95-
def get_all_samples_for_view(datasource: str):
96-
"""Load all available samples for a datasource using the metadata json and fallbacks."""
97-
# point at the scraped metadata json so tests resolve it from the repo
98-
path = os.path.join(os.getcwd(), "data/efp_info/efp_species_view_info.json")
99-
100-
# check for datasources that need hardcoded samples
101-
if datasource == "root_Schaefer_lab":
102-
# this dataset is missing from the scraped metadata so we pin a curated set
103-
print("[info] using hardcoded fallback samples for root_Schaefer_lab")
104-
return ["WTCHG_203594_01", "WTCHG_203594_05", "WTCHG_203839_04", "WTCHG_203594_03", "WTCHG_203594_07", "WTCHG_203839_06", "WTCHG_203839_01", "WTCHG_203594_10", "WTCHG_203839_08", "WTCHG_129187_01", "WTCHG_129189_01", "WTCHG_129190_01", "WTCHG_129187_03", "WTCHG_129189_03", "WTCHG_129190_03", "WTCHG_129187_05", "WTCHG_129189_05", "WTCHG_129187_07", "WTCHG_131167_01", "WTCHG_125416_01", "WTCHG_129190_05", "WTCHG_131167_03", "WTCHG_125416_03", "WTCHG_129190_07", "WTCHG_131167_05", "WTCHG_125416_05", "WTCHG_129189_07"]
105-
106-
if datasource == "atgenexp_stress":
107-
# atgenexp stress views still rely on the json metadata so we keep a minimal fallback
108-
print("[info] using fallback arabidopsis samples from json spec")
109-
return ["AtGen_6_0011", "AtGen_6_0012", "AtGen_6_0021", "AtGen_6_0022",
110-
"AtGen_6_0711", "AtGen_6_0712", "AtGen_6_0721", "AtGen_6_0722"]
111-
112-
# check if metadata json file exists
113-
if not os.path.exists(path):
114-
# repo clones without fixtures can still run, just without auto-sample loading
115-
print(f"[warn] metadata json not found at {path}")
116-
return []
117-
118-
# try to load and parse the json metadata file
119-
try:
120-
with open(path, "r") as f:
121-
metadata = json.load(f)
122-
except Exception as e:
123-
print(f"[error] unable to read json: {e}")
124-
return []
125-
126-
# search through all species and views to find a matching datasource
127-
for species, obj in metadata.items():
128-
views = obj.get("data", {}).get("views", {})
129-
for vname, vinfo in views.items():
130-
if vinfo.get("database") == datasource:
131-
# collect all unique samples from all treatment groups
132-
samples = []
133-
for group in vinfo.get("groups", {}).values():
134-
# each group stores multiple treatment buckets; flatten all of them
135-
for treatment_samples in group.get("treatments", {}).values():
136-
samples.extend(treatment_samples)
137-
print(f"[info] found {len(samples)} samples in json for {datasource}")
138-
return sorted(set(samples))
139-
140-
print(f"[warn] datasource {datasource} not found in json")
141-
return []
142-
143-
@staticmethod
144-
def fetch_efp_data(datasource, gene_id, samples=None):
145-
"""Fetch gene expression data from the external bar eplant api.
146-
Either use the samples provided or auto-fill the list before calling the cgi.
147-
"""
148-
# set up the external bar api url and basic query parameters
149-
base_url = "https://bar.utoronto.ca//eplant/cgi-bin/plantefp.cgi"
150-
query_params = [
151-
("datasource", datasource),
152-
("id", gene_id),
153-
("format", "json"),
154-
]
155-
samples_applied = False # track whether we hinted the cgi with explicit samples
156-
157-
# handle optional sample filtering and expect a normalized list of sample ids
158-
if samples:
159-
cleaned_samples = [sample.strip() for sample in samples if isinstance(sample, str) and sample.strip()]
160-
if cleaned_samples:
161-
query_params.append(("samples", json.dumps(cleaned_samples)))
162-
samples_applied = True
163-
# no samples provided, so try to auto-load all samples for this datasource
164-
else:
165-
samples = EFPValues.get_all_samples_for_view(datasource)
166-
if samples:
167-
print(f"[info] auto-loaded {len(samples)} samples for datasource {datasource}")
168-
query_params.append(("samples", json.dumps(samples)))
169-
samples_applied = True
170-
else:
171-
# no metadata entry means the cgi decides which default samples to use
172-
print(f"[warn] no samples found for datasource {datasource}")
173-
174-
# make exactly one http get request to the bar eplant cgi with every sample packed in
175-
response = requests.get(base_url, params=query_params)
176-
url_called = response.url
177-
178-
# check if the request failed with an http error code
179-
if not response.ok:
180-
# propagate error status so clients see the same http code the cgi returned
181-
return {"success": False, "error": f"bar returned {response.status_code} for url {url_called}"}, response.status_code
182-
183-
# attempt to parse json response and extract the data array
184-
try:
185-
data = response.json()
186-
if isinstance(data, dict) and "data" in data:
187-
data = data["data"]
188-
except Exception:
189-
# remote endpoint occasionally emits html error pages so treat them as no data
190-
data = []
191-
192-
# if no results returned with samples, retry once without sample filtering
193-
if (not data or data == []) and samples_applied:
194-
retry_params = [
195-
("datasource", datasource),
196-
("id", gene_id),
197-
("format", "json"),
198-
]
199-
retry_resp = requests.get(base_url, params=retry_params)
200-
# even if this second call fails, we still return an empty array to the caller
201-
202-
try:
203-
retry_data = retry_resp.json()
204-
if isinstance(retry_data, dict) and "data" in retry_data:
205-
retry_data = retry_data["data"]
206-
except Exception:
207-
# treat malformed fallback responses as empty to keep behavior predictable
208-
retry_data = []
209-
210-
return {
211-
"success": True,
212-
"url_called": url_called,
213-
"record_count": len(retry_data),
214-
"data": retry_data,
215-
"note": "no data returned with samples; fetched full view instead."
216-
}
217-
return {
218-
"success": True,
219-
"url_called": url_called,
220-
"record_count": len(data) if isinstance(data, list) else 0,
221-
"data": data # payload mirrors what the real cgi would have returned
222-
}
223-
224-
def get(self, database, gene_id):
225-
# sanitize path parameters to prevent injection attacks
226-
database = escape(database)
227-
gene_id = escape(gene_id)
228-
229-
# parse ?samples= query args once so downstream logic gets a normalized list
230-
samples_arg = self.parse_samples_query_values(request.args.getlist("samples"))
231-
232-
# delegate to fetch_efp_data which auto-loads samples when none provided
233-
return self.fetch_efp_data(database, gene_id, samples=samples_arg)
234-
235-
236-
# rest endpoint that uses the static schema catalog to query local sqlite databases
237-
# supports urls like /efp_proxy/expression/sample_data/261585_at
23813
@efp_proxy_ns.route("/expression/<string:database>/<string:gene_id>")
23914
@efp_proxy_ns.doc(
240-
description="Static eFP endpoint: /efp_proxy/expression/{database}/{gene_id}"
15+
description="Retrieve gene expression values from a specified eFP database."
24116
)
24217
@efp_proxy_ns.param(
24318
"gene_id",
@@ -268,83 +43,4 @@ def get(self, database, gene_id):
26843
return result, result.get("error_code", 500)
26944

27045

271-
@efp_proxy_ns.route("/bootstrap/simple")
272-
@efp_proxy_ns.doc(
273-
description="Create or update the simple eFP MySQL databases using the in-memory schema definitions.",
274-
params={
275-
"host": "Optional MySQL hostname override. Defaults to the host defined in SQLALCHEMY_BINDS.",
276-
"port": "Optional MySQL port override. Defaults to the port defined in SQLALCHEMY_BINDS.",
277-
"user": "Optional MySQL username override. Defaults to the username defined in SQLALCHEMY_BINDS.",
278-
"password": "Optional MySQL password override. Defaults to the password defined in SQLALCHEMY_BINDS.",
279-
"databases": "Optional list of database names to bootstrap. Defaults to every simple database.",
280-
},
281-
)
282-
class EFPSimpleBootstrap(Resource):
283-
@staticmethod
284-
def _infer_default_db_credentials():
285-
"""Derive MySQL connection info for the simple eFP datasets from the configured binds."""
286-
binds = current_app.config.get("SQLALCHEMY_BINDS") or {}
287-
for db_name in SIMPLE_EFP_DATABASE_SCHEMAS.keys():
288-
uri = binds.get(db_name)
289-
if not uri:
290-
continue
291-
url = make_url(uri)
292-
return {
293-
"host": url.host or "localhost",
294-
"port": url.port or 3306,
295-
"user": url.username or "root",
296-
"password": url.password or "",
297-
}
298-
raise ValueError("No SQLAlchemy bind configured for the simple eFP databases.")
299-
300-
def post(self):
301-
payload = request.get_json(silent=True) or {}
302-
try:
303-
defaults = self._infer_default_db_credentials()
304-
except ValueError as exc:
305-
return {"success": False, "error": str(exc)}, 500
306-
307-
host = payload.get("host") or defaults["host"]
308-
try:
309-
port_value = payload.get("port")
310-
port = int(port_value) if port_value is not None else int(defaults["port"])
311-
except (TypeError, ValueError):
312-
return {"success": False, "error": "port must be an integer"}, 400
313-
user = payload.get("user") or defaults["user"]
314-
password = payload.get("password") or defaults["password"]
315-
316-
databases = payload.get("databases")
317-
if databases is not None:
318-
if not isinstance(databases, list) or not all(isinstance(item, str) for item in databases):
319-
return {"success": False, "error": "databases must be a list of names."}, 400
320-
321-
try:
322-
results = bootstrap_simple_efp_databases(
323-
host=host,
324-
port=port,
325-
user=user,
326-
password=password,
327-
databases=databases,
328-
)
329-
except ValueError as exc:
330-
return {"success": False, "error": str(exc)}, 400
331-
except SQLAlchemyError as exc:
332-
return {"success": False, "error": str(exc)}, 500
333-
334-
model_info = [
335-
{"database": name, "model": model.__name__}
336-
for name, model in SIMPLE_EFP_SAMPLE_MODELS.items()
337-
if databases is None or name in databases
338-
]
339-
340-
return {
341-
"success": True,
342-
"databases": results,
343-
"models": model_info,
344-
"note": "Simple eFP databases are materialized in MySQL while SQLAlchemy models remain dynamic.",
345-
}, 200
346-
347-
348-
efp_proxy_ns.add_resource(EFPValues, '/values/<string:database>/<string:gene_id>')
34946
efp_proxy_ns.add_resource(EFPExpression, '/expression/<string:database>/<string:gene_id>')
350-
efp_proxy_ns.add_resource(EFPSimpleBootstrap, '/bootstrap/simple')

0 commit comments

Comments
 (0)