Skip to content

Commit 16ce80c

Browse files
authored
Merge pull request #227 from BioAnalyticResource/dev
Update from dev branch
2 parents a995369 + b09b18a commit 16ce80c

6 files changed

Lines changed: 137 additions & 92 deletions

File tree

api/resources/gene_information.py

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
from api.models.eplant_soybean import Isoforms as EPlantSoybeanIsoforms
99
from api.utils.bar_utils import BARUtils
1010
from marshmallow import Schema, ValidationError, fields as marshmallow_fields
11-
from api import cache, db
11+
from api import db
1212

1313

1414
gene_information = Namespace("Gene Information", description="Information about Genes", path="/gene_information")
1515

1616
# I think this is only needed for Swagger UI POST
17-
gene_isoforms_request_fields = gene_information.model(
18-
"GeneIsoforms",
17+
gene_information_request_fields = gene_information.model(
18+
"GeneInformation",
1919
{
2020
"species": fields.String(required=True, example="arabidopsis"),
2121
"genes": fields.List(
@@ -28,46 +28,62 @@
2828

2929

3030
# Validation is done in a different way to keep things simple
31-
class GeneIsoformsSchema(Schema):
31+
class GeneInformationSchema(Schema):
3232
species = marshmallow_fields.String(required=True)
3333
genes = marshmallow_fields.List(cls_or_instance=marshmallow_fields.String)
3434

3535

36-
@gene_information.route("/gene_alias")
37-
class GeneAliasList(Resource):
38-
def get(self):
39-
"""This end point returns the list of species available"""
40-
species = ["arabidopsis"] # This are the only species available so far
41-
return BARUtils.success_exit(species)
42-
36+
@gene_information.route("/gene_aliases")
37+
class GeneAliases(Resource):
38+
@gene_information.expect(gene_information_request_fields)
39+
def post(self):
40+
"""This end point retrieves gene aliases for a large dataset"""
41+
json_data = request.get_json()
42+
data = {}
4343

44-
@gene_information.route("/gene_alias/<string:species>/<string:gene_id>")
45-
class GeneAlias(Resource):
46-
@gene_information.param("species", _in="path", default="arabidopsis")
47-
@gene_information.param("gene_id", _in="path", default="At3g24650")
48-
@cache.cached()
49-
def get(self, species="", gene_id=""):
50-
"""This end point provides gene alias given a gene ID."""
51-
aliases = []
44+
# Validate json
45+
try:
46+
json_data = GeneInformationSchema().load(json_data)
47+
except ValidationError as err:
48+
return BARUtils.error_exit(err.messages), 400
5249

53-
# Escape input
54-
species = escape(species)
55-
gene_id = escape(gene_id)
50+
genes = json_data["genes"]
51+
species = json_data["species"]
5652

53+
# Set species and check gene ID format
5754
if species == "arabidopsis":
58-
if BARUtils.is_arabidopsis_gene_valid(gene_id):
59-
rows = db.session.execute(db.select(AgiAlias).where(AgiAlias.agi == gene_id)).scalars().all()
60-
[aliases.append(row.alias) for row in rows]
61-
else:
62-
return BARUtils.error_exit("Invalid gene id"), 400
55+
database = AgiAlias
56+
57+
# Check if gene is valid
58+
for gene in genes:
59+
if not BARUtils.is_arabidopsis_gene_valid(gene):
60+
return BARUtils.error_exit("Invalid gene id"), 400
61+
6362
else:
64-
return BARUtils.error_exit("No data for the given species")
63+
return BARUtils.error_exit("Invalid species"), 400
64+
65+
# Query must be run individually for each species
66+
rows = db.session.execute(db.select(database).where(database.agi.in_(genes))).scalars().all()
67+
68+
# If there are any isoforms found, return data
69+
data = []
70+
data_items = {}
71+
72+
if len(rows) > 0:
73+
for row in rows:
74+
if row.agi in data_items.keys():
75+
data_items[row.agi].append(row.agi)
76+
else:
77+
data_items[row.agi] = []
78+
data_items[row.agi].append(row.alias)
79+
80+
for gene in data_items.keys():
81+
data.append({"gene": gene, "aliases": data_items[gene]})
82+
83+
return BARUtils.success_exit(data)
6584

66-
# Return results if there are data
67-
if len(aliases) > 0:
68-
return BARUtils.success_exit(aliases)
6985
else:
70-
return BARUtils.error_exit("There are no data found for the given gene")
86+
return BARUtils.error_exit("No data for the given species/genes"), 400
7187

7288

7389
@gene_information.route("/gene_isoforms/<string:species>/<string:gene_id>")
@@ -126,7 +142,7 @@ def get(self, species="", gene_id=""):
126142

127143
@gene_information.route("/gene_isoforms/")
128144
class PostGeneIsoforms(Resource):
129-
@gene_information.expect(gene_isoforms_request_fields)
145+
@gene_information.expect(gene_information_request_fields)
130146
def post(self):
131147
"""This end point returns gene isoforms data for a multiple genes for a species.
132148
Only genes/isoforms with pdb structures are returned"""
@@ -136,7 +152,7 @@ def post(self):
136152

137153
# Validate json
138154
try:
139-
json_data = GeneIsoformsSchema().load(json_data)
155+
json_data = GeneInformationSchema().load(json_data)
140156
except ValidationError as err:
141157
return BARUtils.error_exit(err.messages), 400
142158

config/databases/annotations_lookup.sql

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
-- MariaDB dump 10.17 Distrib 10.4.12-MariaDB, for OpenBSD (amd64)
1+
-- MySQL dump 10.13 Distrib 8.3.0, for Linux (x86_64)
22
--
33
-- Host: localhost Database: annotations_lookup
44
-- ------------------------------------------------------
5-
-- Server version 10.4.12-MariaDB
5+
-- Server version 8.3.0
66

77
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
88
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
99
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
10-
/*!40101 SET NAMES utf8mb4 */;
10+
/*!50503 SET NAMES utf8mb4 */;
1111
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
1212
/*!40103 SET TIME_ZONE='+00:00' */;
1313
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
@@ -19,7 +19,7 @@
1919
-- Current Database: `annotations_lookup`
2020
--
2121

22-
CREATE DATABASE /*!32312 IF NOT EXISTS*/ `annotations_lookup` /*!40100 DEFAULT CHARACTER SET latin1 */;
22+
CREATE DATABASE /*!32312 IF NOT EXISTS*/ `annotations_lookup` /*!40100 DEFAULT CHARACTER SET latin1 */ /*!80016 DEFAULT ENCRYPTION='N' */;
2323

2424
USE `annotations_lookup`;
2525

@@ -29,7 +29,7 @@ USE `annotations_lookup`;
2929

3030
DROP TABLE IF EXISTS `agi_alias`;
3131
/*!40101 SET @saved_cs_client = @@character_set_client */;
32-
/*!40101 SET character_set_client = utf8 */;
32+
/*!50503 SET character_set_client = utf8mb4 */;
3333
CREATE TABLE `agi_alias` (
3434
`agi` varchar(30) NOT NULL,
3535
`alias` varchar(30) NOT NULL,
@@ -45,7 +45,7 @@ CREATE TABLE `agi_alias` (
4545

4646
LOCK TABLES `agi_alias` WRITE;
4747
/*!40000 ALTER TABLE `agi_alias` DISABLE KEYS */;
48-
INSERT INTO `agi_alias` VALUES ('At3g24650','ABI3','2019-04-02'),('At3g24650','AtABI3','2019-04-02'),('At3g24650','SIS10','2019-04-02');
48+
INSERT INTO `agi_alias` VALUES ('At1g01010','ANAC001','2022-06-30'),('At1g01010','NAC001','2022-06-30'),('At1g01010','NTL10','2022-06-30'),('At1g01020','ARV1','2022-06-30');
4949
/*!40000 ALTER TABLE `agi_alias` ENABLE KEYS */;
5050
UNLOCK TABLES;
5151
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
@@ -58,4 +58,4 @@ UNLOCK TABLES;
5858
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
5959
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
6060

61-
-- Dump completed on 2020-06-26 15:59:53
61+
-- Dump completed on 2024-05-02 19:19:37

docs/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ beautifulsoup4==4.12.3
44
certifi==2024.2.2
55
charset-normalizer==3.3.2
66
docutils==0.21.2
7-
furo==2024.1.29
7+
furo==2024.4.27
88
idna==3.7
99
imagesize==1.4.1
1010
Jinja2==3.1.3

docs/source/user_guide.rst

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,29 @@
11
User Guide
22
==========
33

4-
Coming soon ...
4+
Gene Information
5+
----------------
6+
7+
These endpoints can be used to retrieve gene annotations, aliases, probesets etc. For large input datasets, please use
8+
POST request because POST request do not have URL length limit. Currently, the following species are available.
9+
10+
1. Arabidopsis
11+
12+
**POST /gene_information/gene_aliases/**
13+
14+
This end point provides gene aliases given a list of gene IDs for a species.
15+
16+
**GET /gene_information/gene_isoforms/{species}/{gene_id}**
17+
18+
This end point provides gene isoforms given a gene ID.
19+
20+
**POST /gene_information/gene_isoforms/**
21+
22+
This end point provides gene isoforms given a gene ID. This is the POST request that take a JSON object of species and
23+
genes.
24+
25+
26+
27+
28+
29+

requirements.txt

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
aniso8601==9.0.1
22
async-timeout==4.0.3
33
attrs==23.2.0
4-
black==24.4.1
5-
blinker==1.7.0
4+
black==24.4.2
5+
blinker==1.8.2
66
cachelib==0.9.0
7-
certifi==2024.2.2
7+
certifi==2024.7.4
88
charset-normalizer==3.3.2
99
click==8.1.7
10-
coverage==7.5.0
10+
coverage==7.5.4
1111
Deprecated==1.2.14
12-
flake8==7.0.0
12+
flake8==7.1.0
1313
Flask==3.0.3
14-
Flask-Caching==2.1.0
15-
Flask-Cors==4.0.0
16-
Flask-Limiter==3.6.0
14+
Flask-Caching==2.3.0
15+
Flask-Cors==4.0.1
16+
Flask-Limiter==3.7.0
1717
flask-marshmallow==1.2.1
1818
flask-restx==1.3.0
1919
Flask-SQLAlchemy==3.1.1
@@ -22,37 +22,39 @@ idna==3.7
2222
importlib_resources==6.4.0
2323
iniconfig==2.0.0
2424
itsdangerous==2.2.0
25-
Jinja2==3.1.3
26-
jsonschema==4.21.1
25+
Jinja2==3.1.4
26+
jsonschema==4.23.0
2727
jsonschema-specifications==2023.12.1
28-
limits==3.11.0
28+
limits==3.13.0
2929
markdown-it-py==3.0.0
3030
MarkupSafe==2.1.5
31-
marshmallow==3.21.1
31+
marshmallow==3.21.3
3232
mccabe==0.7.0
3333
mdurl==0.1.2
3434
mypy-extensions==1.0.0
3535
mysqlclient==2.2.4
3636
ordered-set==4.1.0
37-
packaging==24.0
37+
packaging==24.1
3838
pathspec==0.12.1
39-
platformdirs==4.2.1
39+
platformdirs==4.2.2
4040
pluggy==1.5.0
41-
pycodestyle==2.11.1
41+
pycodestyle==2.12.0
4242
pyflakes==3.2.0
43-
Pygments==2.17.2
43+
Pygments==2.18.0
4444
pyrsistent==0.20.0
45-
pytest==8.1.1
45+
pytest==8.2.2
4646
python-dateutil==2.9.0.post0
4747
pytz==2024.1
48-
redis==5.0.4
49-
referencing==0.35.0
50-
requests==2.31.0
48+
redis==5.0.7
49+
referencing==0.35.1
50+
requests==2.32.3
5151
rich==13.7.1
52-
rpds-py==0.18.0
52+
rpds-py==0.19.0
53+
setuptools==70.2.0
5354
six==1.16.0
54-
SQLAlchemy==2.0.29
55-
typing_extensions==4.11.0
56-
urllib3==2.2.1
57-
Werkzeug==3.0.2
55+
SQLAlchemy==2.0.31
56+
typing_extensions==4.12.2
57+
urllib3==2.2.2
58+
Werkzeug==3.0.3
59+
wheel==0.43.0
5860
wrapt==1.16.0

tests/resources/test_gene_information.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,39 +6,41 @@ class TestIntegrations(TestCase):
66
def setUp(self):
77
self.app_client = app.test_client()
88

9-
def test_get_gene_alias_list(self):
10-
"""This function tests the gene alias list get function
9+
def test_post_gene_aliases(self):
10+
"""This test POST request to /gene_information/gene_aliases/
1111
:return:
1212
"""
13-
response = self.app_client.get("/gene_information/gene_alias")
14-
expected = {"wasSuccessful": True, "data": ["arabidopsis"]}
15-
self.assertEqual(response.json, expected)
16-
17-
def test_get_arabidopsis_gene_alias(self):
18-
"""This tests checks GET request for gene alias Arabidopsis
19-
:return:
20-
"""
21-
# Valid data
22-
response = self.app_client.get("/gene_information/gene_alias/arabidopsis/At3g24650")
23-
expected = {"wasSuccessful": True, "data": ["ABI3", "AtABI3", "SIS10"]}
24-
self.assertEqual(response.json, expected)
25-
26-
# Data not found, but gene is valid
27-
response = self.app_client.get("/gene_information/gene_alias/arabidopsis/At3g24651")
13+
data = {"species": "arabidopsis", "genes": ["AT1G01010", "AT1G01020"]}
2814
expected = {
29-
"wasSuccessful": False,
30-
"error": "There are no data found for the given gene",
15+
"wasSuccessful": True,
16+
"data": [
17+
{"gene": "At1g01010", "aliases": ["ANAC001", "At1g01010", "At1g01010"]},
18+
{"gene": "At1g01020", "aliases": ["ARV1"]},
19+
],
3120
}
21+
response = self.app_client.post("/gene_information/gene_aliases", json=data)
22+
self.assertEqual(response.status_code, 200)
3223
self.assertEqual(response.json, expected)
3324

34-
# Invalid Gene
35-
response = self.app_client.get("/gene_information/gene_alias/arabidopsis/At3g2465x")
25+
# Invalid gene
26+
data = {"species": "abc", "genes": ["AT1G01010", "AT1G01020"]}
27+
expected = {"wasSuccessful": False, "error": "Invalid species"}
28+
response = self.app_client.post("/gene_information/gene_aliases", json=data)
29+
self.assertEqual(response.status_code, 400)
30+
self.assertEqual(response.json, expected)
31+
32+
# Invalid gene
33+
data = {"species": "arabidopsis", "genes": ["abc", "AT1G01020"]}
3634
expected = {"wasSuccessful": False, "error": "Invalid gene id"}
35+
response = self.app_client.post("/gene_information/gene_aliases", json=data)
36+
self.assertEqual(response.status_code, 400)
3737
self.assertEqual(response.json, expected)
3838

39-
# Invalid Species
40-
response = self.app_client.get("/gene_information/gene_alias/x/At3g24650")
41-
expected = {"wasSuccessful": False, "error": "No data for the given species"}
39+
# No data
40+
data = {"species": "arabidopsis", "genes": ["AT1G01011", "AT1G01021"]}
41+
expected = {"wasSuccessful": False, "error": "No data for the given species/genes"}
42+
response = self.app_client.post("/gene_information/gene_aliases", json=data)
43+
self.assertEqual(response.status_code, 400)
4244
self.assertEqual(response.json, expected)
4345

4446
def test_get_arabidopsis_gene_isoform(self):

0 commit comments

Comments
 (0)