Skip to content

Commit b4bb9f3

Browse files
committed
Merge remote-tracking branch 'upstream/dev' into dev
2 parents 3806507 + c0e8772 commit b4bb9f3

18 files changed

Lines changed: 746 additions & 91 deletions

api/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def create_app():
5353
# Now add routes
5454
from api.resources.gene_information import gene_information
5555
from api.resources.rnaseq_gene_expression import rnaseq_gene_expression
56+
from api.resources.microarray_gene_expression import microarray_gene_expression
5657
from api.resources.proxy import bar_proxy
5758
from api.resources.thalemine import thalemine
5859
from api.resources.snps import snps
@@ -65,6 +66,7 @@ def create_app():
6566

6667
bar_api.add_namespace(gene_information)
6768
bar_api.add_namespace(rnaseq_gene_expression)
69+
bar_api.add_namespace(microarray_gene_expression)
6870
bar_api.add_namespace(bar_proxy)
6971
bar_api.add_namespace(thalemine)
7072
bar_api.add_namespace(snps)

api/models/annotations_lookup.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,13 @@ class AgiAlias(db.Model):
1010
agi: db.Mapped[str] = db.mapped_column(db.String(30), primary_key=True, nullable=False)
1111
alias: db.Mapped[str] = db.mapped_column(db.String(30), primary_key=True, nullable=False)
1212
date: db.Mapped[datetime] = db.mapped_column(db.Date, primary_key=True, nullable=False)
13+
14+
15+
class AtAgiLookup(db.Model):
16+
__bind_key__ = "annotations_lookup"
17+
__tablename__ = "at_agi_lookup"
18+
__table_args__ = (db.Index("probeset_date_agi", "probeset", "agi", "date"),)
19+
20+
probeset: db.Mapped[str] = db.mapped_column(db.String(60), primary_key=True, nullable=False)
21+
agi: db.Mapped[str] = db.mapped_column(db.String(30), primary_key=True, nullable=False)
22+
date: db.Mapped[datetime] = db.mapped_column(db.Date, primary_key=True, nullable=False)

api/models/arabidopsis_ecotypes.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from api import db
2+
3+
4+
class SampleData(db.Model):
5+
__bind_key__ = "arabidopsis_ecotypes"
6+
__tablename__ = "sample_data"
7+
8+
sample_id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False)
9+
proj_id: db.Mapped[str] = db.mapped_column(db.String(15), nullable=False)
10+
sample_file_name: db.Mapped[str] = db.mapped_column(db.String)
11+
data_probeset_id: db.Mapped[str] = db.mapped_column(db.String(30), nullable=False, primary_key=True)
12+
data_signal: db.Mapped[float] = db.mapped_column(db.Float)
13+
data_call: db.Mapped[str] = db.mapped_column(db.String)
14+
data_p_val: db.Mapped[float] = db.mapped_column(db.Float)
15+
data_bot_id: db.Mapped[str] = db.mapped_column(db.String(16), nullable=False)

api/resources/gene_information.py

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99
from api.models.eplant_soybean import Isoforms as EPlantSoybeanIsoforms
1010
from api.utils.bar_utils import BARUtils
1111
from marshmallow import Schema, ValidationError, fields as marshmallow_fields
12-
from api import cache, db
12+
from api import db
1313

1414

1515
gene_information = Namespace("Gene Information", description="Information about Genes", path="/gene_information")
1616

1717
# I think this is only needed for Swagger UI POST
18-
gene_isoforms_request_fields = gene_information.model(
19-
"GeneIsoforms",
18+
gene_information_request_fields = gene_information.model(
19+
"GeneInformation",
2020
{
2121
"species": fields.String(required=True, example="arabidopsis"),
2222
"genes": fields.List(
@@ -29,46 +29,62 @@
2929

3030

3131
# Validation is done in a different way to keep things simple
32-
class GeneIsoformsSchema(Schema):
32+
class GeneInformationSchema(Schema):
3333
species = marshmallow_fields.String(required=True)
3434
genes = marshmallow_fields.List(cls_or_instance=marshmallow_fields.String)
3535

3636

37-
@gene_information.route("/gene_alias")
38-
class GeneAliasList(Resource):
39-
def get(self):
40-
"""This end point returns the list of species available"""
41-
species = ["arabidopsis"] # This are the only species available so far
42-
return BARUtils.success_exit(species)
43-
37+
@gene_information.route("/gene_aliases")
38+
class GeneAliases(Resource):
39+
@gene_information.expect(gene_information_request_fields)
40+
def post(self):
41+
"""This end point retrieves gene aliases for a large dataset"""
42+
json_data = request.get_json()
43+
data = {}
4444

45-
@gene_information.route("/gene_alias/<string:species>/<string:gene_id>")
46-
class GeneAlias(Resource):
47-
@gene_information.param("species", _in="path", default="arabidopsis")
48-
@gene_information.param("gene_id", _in="path", default="At3g24650")
49-
@cache.cached()
50-
def get(self, species="", gene_id=""):
51-
"""This end point provides gene alias given a gene ID."""
52-
aliases = []
45+
# Validate json
46+
try:
47+
json_data = GeneInformationSchema().load(json_data)
48+
except ValidationError as err:
49+
return BARUtils.error_exit(err.messages), 400
5350

54-
# Escape input
55-
species = escape(species)
56-
gene_id = escape(gene_id)
51+
genes = json_data["genes"]
52+
species = json_data["species"]
5753

54+
# Set species and check gene ID format
5855
if species == "arabidopsis":
59-
if BARUtils.is_arabidopsis_gene_valid(gene_id):
60-
rows = db.session.execute(db.select(AgiAlias).where(AgiAlias.agi == gene_id)).scalars().all()
61-
[aliases.append(row.alias) for row in rows]
62-
else:
63-
return BARUtils.error_exit("Invalid gene id"), 400
56+
database = AgiAlias
57+
58+
# Check if gene is valid
59+
for gene in genes:
60+
if not BARUtils.is_arabidopsis_gene_valid(gene):
61+
return BARUtils.error_exit("Invalid gene id"), 400
62+
6463
else:
65-
return BARUtils.error_exit("No data for the given species")
64+
return BARUtils.error_exit("Invalid species"), 400
65+
66+
# Query must be run individually for each species
67+
rows = db.session.execute(db.select(database).where(database.agi.in_(genes))).scalars().all()
68+
69+
# If there are any isoforms found, return data
70+
data = []
71+
data_items = {}
72+
73+
if len(rows) > 0:
74+
for row in rows:
75+
if row.agi in data_items.keys():
76+
data_items[row.agi].append(row.agi)
77+
else:
78+
data_items[row.agi] = []
79+
data_items[row.agi].append(row.alias)
80+
81+
for gene in data_items.keys():
82+
data.append({"gene": gene, "aliases": data_items[gene]})
83+
84+
return BARUtils.success_exit(data)
6685

67-
# Return results if there are data
68-
if len(aliases) > 0:
69-
return BARUtils.success_exit(aliases)
7086
else:
71-
return BARUtils.error_exit("There are no data found for the given gene")
87+
return BARUtils.error_exit("No data for the given species/genes"), 400
7288

7389

7490
@gene_information.route("/gene_publications/<string:species>/<string:gene_id>")
@@ -166,7 +182,7 @@ def get(self, species="", gene_id=""):
166182

167183
@gene_information.route("/gene_isoforms/")
168184
class PostGeneIsoforms(Resource):
169-
@gene_information.expect(gene_isoforms_request_fields)
185+
@gene_information.expect(gene_information_request_fields)
170186
def post(self):
171187
"""This end point returns gene isoforms data for a multiple genes for a species.
172188
Only genes/isoforms with pdb structures are returned"""
@@ -176,7 +192,7 @@ def post(self):
176192

177193
# Validate json
178194
try:
179-
json_data = GeneIsoformsSchema().load(json_data)
195+
json_data = GeneInformationSchema().load(json_data)
180196
except ValidationError as err:
181197
return BARUtils.error_exit(err.messages), 400
182198

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from flask_restx import Namespace, Resource
2+
from markupsafe import escape
3+
from api import db
4+
from api.models.annotations_lookup import AtAgiLookup
5+
from api.models.arabidopsis_ecotypes import SampleData as EcotypesSampleData
6+
from api.utils.bar_utils import BARUtils
7+
from api.utils.world_efp_utils import WorldeFPUtils
8+
9+
10+
microarray_gene_expression = Namespace(
11+
"Microarray Gene Expression",
12+
description="Microarray (probe-based) Gene Expression data from the BAR Databases",
13+
path="/microarray_gene_expression",
14+
)
15+
16+
17+
@microarray_gene_expression.route("/world_efp/<string:species>/<string:gene_id>")
18+
class GetWorldeFPExpression(Resource):
19+
@microarray_gene_expression.param("species", _in="path", default="arabidopsis")
20+
@microarray_gene_expression.param("gene_id", _in="path", default="At1g01010")
21+
def get(self, species="", gene_id=""):
22+
"""This end point returns World Efp gene expression data"""
23+
species = escape(species)
24+
gene_id = escape(gene_id)
25+
26+
if species == "arabidopsis":
27+
if not BARUtils.is_arabidopsis_gene_valid(gene_id):
28+
return BARUtils.error_exit("Invalid gene id")
29+
else:
30+
return BARUtils.error_exit("Invalid species")
31+
subquery = db.select(AtAgiLookup.probeset).where(AtAgiLookup.agi == gene_id).order_by(AtAgiLookup.date.desc()).limit(1).subquery()
32+
33+
sq_query = db.session.query(subquery)
34+
if sq_query.count() > 0:
35+
sq_result = sq_query[0][0]
36+
else:
37+
return BARUtils.error_exit("There are no data found for the given gene")
38+
39+
rows = db.session.execute(
40+
db.select(EcotypesSampleData.data_probeset_id, EcotypesSampleData.data_signal, EcotypesSampleData.data_bot_id).where(EcotypesSampleData.data_probeset_id == sq_result)
41+
).all()
42+
final_json = {}
43+
44+
if len(rows) > 0:
45+
for row in rows:
46+
if row[2][5:8] not in final_json:
47+
final_json[row[2][5:8]] = WorldeFPUtils.wrap_json(row[2][5:8], row[2], row[1], row[0])
48+
elif row[2][5:8] in final_json:
49+
final_json[row[2][5:8]]['values'].update({row[2] : row[1]})
50+
return BARUtils.success_exit(final_json)
51+
else:
52+
return BARUtils.error_exit("There are no data found for the given gene")

0 commit comments

Comments
 (0)