Skip to content

Commit 607b367

Browse files
authored
Merge pull request #239 from asherpasha/dev
Refactoring code.
2 parents f15d597 + f94efe4 commit 607b367

8 files changed

Lines changed: 343 additions & 720 deletions

File tree

api/resources/gene_information.py

Lines changed: 214 additions & 226 deletions
Large diffs are not rendered by default.

api/resources/interactions.py

Lines changed: 3 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,14 @@
55
"""
66

77
from flask_restx import Namespace, Resource, fields
8-
from flask import request, jsonify
8+
from flask import request
99
from markupsafe import escape
1010
from api.utils.bar_utils import BARUtils
1111
from marshmallow import Schema, ValidationError, fields as marshmallow_fields
1212
from api import db
1313
from api.models.rice_interactions import Interactions as RiceInteractions
1414
from sqlalchemy import or_
1515

16-
import tempfile
17-
import os
18-
import subprocess
19-
from collections import defaultdict
20-
2116
itrns = Namespace(
2217
"Interactions",
2318
description="Interactions (protein-protein, protein-DNA, etc) endpoint",
@@ -41,8 +36,7 @@
4136
{
4237
"data": fields.List(
4338
required=True,
44-
example=[["AT5G67420", "AT1G12110"],
45-
["AT5G67420", "AT1G08090"]],
39+
example=[["AT5G67420", "AT1G12110"], ["AT5G67420", "AT1G08090"]],
4640
cls_or_instance=fields.List(fields.String),
4741
),
4842
},
@@ -55,9 +49,7 @@ class GeneIntrnsSchema(Schema):
5549

5650

5751
class MFinderDataSchema(Schema):
58-
data = marshmallow_fields.List(
59-
marshmallow_fields.List(marshmallow_fields.String())
60-
)
52+
data = marshmallow_fields.List(marshmallow_fields.List(marshmallow_fields.String()))
6153

6254

6355
@itrns.route("/<species>/<query_gene>")
@@ -164,200 +156,3 @@ def post(self):
164156
return BARUtils.success_exit(res)
165157
else:
166158
return BARUtils.error_exit("No data for the given species/genes"), 400
167-
168-
169-
@itrns.route("/mfinder")
170-
class MFinder(Resource):
171-
@itrns.expect(post_int_data)
172-
def post(self):
173-
"""This endpoint was originally written by Vincent Lau to return mFinder
174-
results to AGENT in his express node.JS app. However Tianhui Zhao refactored
175-
to the BAR_API
176-
"""
177-
data = request.get_json()
178-
# Validate json
179-
try:
180-
data = MFinderDataSchema().load(data)
181-
except ValidationError as err:
182-
return BARUtils.error_exit(err.messages), 400
183-
184-
filtered_valid_arr = self.input_validation(data['data'])
185-
if isinstance(filtered_valid_arr, str):
186-
return BARUtils.error_exit(filtered_valid_arr), 400
187-
settings = self.settings_validation(data.get('options', {}))
188-
ret_json = self.create_files_and_mfinder(filtered_valid_arr, settings)
189-
return jsonify(self.beautify_results(ret_json))
190-
191-
# Eliminates same pairs
192-
def uniq_with(self, arr, comp_func):
193-
unique_arr = []
194-
for item in arr:
195-
if not any(comp_func(item, unique_item) for unique_item in unique_arr):
196-
unique_arr.append(item)
197-
return unique_arr
198-
199-
def is_equal(self, a, b):
200-
return a == b
201-
202-
def find_key(self, d, value):
203-
return next(key for key, val in d.items() if val == value)
204-
205-
# Check if JSON body data obj is an array of arrays (2d arr)
206-
# ex [ [ "AT1G010100", "AT5G01010" ], ["AT3G10000", "AT2G03240"]]
207-
# {Array<Array<string>>} input: the above arr
208-
def input_validation(self, input):
209-
if not isinstance(input, list):
210-
return "invalid JSON, not an arr"
211-
212-
if len(input) == 0:
213-
return "arr length 0!"
214-
215-
if any(len(i) != 2 for i in input):
216-
return "inner arr length is not of length 2!"
217-
218-
if not all(isinstance(i, list) for i in input):
219-
return "invalid JSON, check arr members are arrs!"
220-
221-
if not all(isinstance(j, str) for i in input for j in i):
222-
return "invalid JSON, check if inside arr members are strings!"
223-
224-
if not all(BARUtils.is_arabidopsis_gene_valid(j) for i in input for j in i):
225-
return "Invalid gene ID contained!"
226-
227-
# filter self-edges and duplicate edges (mFinder does not accept)
228-
return self.uniq_with([i for i in input if i[0] != i[1]], self.is_equal)
229-
230-
# Some mFinders params allowed within reasonable server load. Namely mFinder takes 3 basic params: nd (non-directed network),
231-
# r (# of rand networks to gen), s (motif size), u (unique min), z (z-score min). The defaults are directed, 100, 3, 4, & 2
232-
# respectively. HOWEVER choose r of 30 for speed
233-
# Do a validation check on each value too!
234-
# opts: the JSON settings object, can be empty in which we provide the default
235-
def settings_validation(self, opts):
236-
opts = opts or {}
237-
self.injection_check(opts)
238-
settings_obj = opts.copy()
239-
if 'nd' not in opts:
240-
settings_obj['nd'] = False
241-
elif not isinstance(opts['nd'], bool):
242-
return "incorrect nd setting - is it boolean?", 400
243-
244-
if 'r' not in opts:
245-
settings_obj['r'] = 50
246-
elif not isinstance(opts['r'], int) or opts['r'] > 150:
247-
return "incorrect r setting - is it a number under 151?", 400
248-
249-
if 's' not in opts:
250-
settings_obj['s'] = 3
251-
elif not isinstance(opts['s'], int) or opts['s'] < 2 or opts['s'] > 4:
252-
return "incorrect s setting - is it a number between 2 and 4?", 400
253-
254-
if 'u' not in opts:
255-
settings_obj['u'] = 4
256-
elif not isinstance(opts['u'], int) or opts['u'] > 999:
257-
return "incorrect u setting - is it a number or below 1000?", 400
258-
259-
if 'z' not in opts:
260-
settings_obj['z'] = 2
261-
elif not isinstance(opts['z'], int) or opts['z'] > 99:
262-
return "incorrect z setting - is it a number or below 100?", 400
263-
264-
return settings_obj
265-
266-
# Check for injection, throw if suspiciously long command is found.
267-
# object: to validate for injection
268-
def injection_check(self, obj):
269-
for key, value in obj.items():
270-
if len(str(value)) > 10:
271-
return f"{key} settings param is too long", 400
272-
273-
# Take in the filtered array of gene-id pairs (edges) and perform
274-
# mFinder analysis on them (create temp text files to do so)
275-
# Performed SYNCHRONOUSLY !!!
276-
def create_files_and_mfinder(self, input, opts_obj):
277-
278-
# give read/write permissions to user but nada to anybody else
279-
tmpfile = tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False)
280-
os.chmod(tmpfile.name, 0o600)
281-
282-
# get a hash of IDs -> numbers for later lookup and writable string
283-
hash_of_ids, return_str = self.get_gene_id_hash_map(input)
284-
285-
# write to temp file which mFinder will run/read on
286-
tmpfile.write(return_str)
287-
tmpfile.flush()
288-
289-
command = (
290-
f"/bartmp/mfinder {tmpfile.name} "
291-
f"-s {opts_obj['s']} "
292-
f"-r {opts_obj['r']} "
293-
f"-u {opts_obj['u']} "
294-
f"-z {opts_obj['z']} "
295-
f"{'-nd ' if opts_obj.get('nd') else ''}"
296-
"-omem"
297-
)
298-
subprocess.run(command, shell=True, check=True)
299-
300-
with open(tmpfile.name[:-4] + "_OUT.txt", 'r') as stats_file:
301-
mfinder_stats = stats_file.read()
302-
303-
with open(tmpfile.name[:-4] + "_MEMBERS.txt", 'r') as members_file:
304-
mfinder_members = members_file.read()
305-
306-
tmpfile.close()
307-
os.remove(tmpfile.name)
308-
309-
return {'hashOfIds': hash_of_ids, 'mFinderStats': mfinder_stats, 'mFinderMembers': mfinder_members}
310-
311-
# Take an input of array of array of strings which represent edges and transform those gene IDs (unique!) to a hash table and
312-
# coinciding edges i.e. [["PHE", "PAT"], ["PAT, "PAN"]] to "232 210 1 \n 210 100 1\n"
313-
def get_gene_id_hash_map(self, input):
314-
hash_of_ids = defaultdict(lambda: None)
315-
iter = 1
316-
return_str = ""
317-
for item in input:
318-
if item[0] not in hash_of_ids.values():
319-
hash_of_ids[iter] = item[0]
320-
iter += 1
321-
if item[1] not in hash_of_ids.values():
322-
hash_of_ids[iter] = item[1]
323-
iter += 1
324-
return_str += f"{self.find_key(hash_of_ids, item[0])} {self.find_key(hash_of_ids, item[1])} 1\n"
325-
326-
return hash_of_ids, return_str
327-
328-
# Beautify the output file string and members file string
329-
def beautify_results(self, mfinder_res_obj):
330-
stats = mfinder_res_obj['mFinderStats']
331-
mems = mfinder_res_obj['mFinderMembers']
332-
id_map = mfinder_res_obj['hashOfIds']
333-
ret_obj = {'sigMotifs': {}, 'motifList': {}}
334-
335-
try:
336-
sig_motifs_str = stats.split('[MILI]\t\n\n')[1].split('Full')[0].split('\n\n')
337-
# In case stats has less than 2 parts after split('[MILI]\t\n\n')[1]
338-
except IndexError:
339-
raise ValueError("Expected delimiter '[MILI]\t\n\n' or 'Full' not found in the stats string.")
340-
sig_motifs_str = sig_motifs_str[:len(sig_motifs_str) - 2:2]
341-
for item in sig_motifs_str:
342-
split_stats_for_motif_id = item.split('\t')
343-
ret_obj['sigMotifs'][split_stats_for_motif_id[0]] = {
344-
'numAppearances': split_stats_for_motif_id[1],
345-
'numAppearancesRand': split_stats_for_motif_id[2],
346-
'appearancesZScore': split_stats_for_motif_id[3],
347-
'pValue': split_stats_for_motif_id[4],
348-
'uniq': split_stats_for_motif_id[5],
349-
'conc': split_stats_for_motif_id[6],
350-
}
351-
352-
subgraphs_list_str = mems.split('subgraph id = ')[1:]
353-
for subgraph_str in subgraphs_list_str:
354-
member_list_split = subgraph_str.split('\n')
355-
motif_mem_list = [i.rstrip('\t') for i in member_list_split[5:-2]]
356-
motif_mem_results = []
357-
for i in motif_mem_list:
358-
three_genes = i.split('\t')
359-
formatted_str = f"{id_map[int(three_genes[0])]}\t{id_map[int(three_genes[1])]}\t{id_map[int(three_genes[2])]}" # i.e. PAT\tPAN\tEGFR
360-
motif_mem_results.append(formatted_str)
361-
ret_obj['motifList'][member_list_split[0]] = motif_mem_results
362-
363-
return BARUtils.success_exit(ret_obj)

api/resources/llama3.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,21 @@ def get(self, gene_id=""):
2626
gene_id = escape(gene_id.upper())
2727

2828
if BARUtils.is_arabidopsis_gene_valid(gene_id):
29-
rows = (
30-
db.session.execute(db.select(Summaries).where(Summaries.gene_id == gene_id))
31-
.first()
32-
)
29+
rows = db.session.execute(db.select(Summaries).where(Summaries.gene_id == gene_id)).first()
3330

34-
if len(rows) == 0:
31+
if rows and len(rows) > 0:
32+
res = {
33+
"summary": rows[0].summary,
34+
"gene_id": rows[0].gene_id,
35+
"bert_score": rows[0].bert_score,
36+
}
37+
38+
return BARUtils.success_exit(res)
39+
else:
3540
return (
3641
BARUtils.error_exit("There are no data found for the given gene"),
3742
400,
3843
)
39-
else:
40-
res = {
41-
"summary": rows[0].summary,
42-
"gene_id": rows[0].gene_id,
43-
"bert_score": rows[0].bert_score,
44-
}
45-
return BARUtils.success_exit(res)
44+
4645
else:
4746
return BARUtils.error_exit("Invalid gene id"), 400

requirements.txt

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,22 @@ attrs==24.2.0
44
black==24.8.0
55
blinker==1.8.2
66
cachelib==0.9.0
7-
certifi==2024.7.4
7+
certifi==2024.8.30
88
charset-normalizer==3.3.2
99
click==8.1.7
1010
coverage==7.6.1
1111
Deprecated==1.2.14
1212
flake8==7.1.1
1313
Flask==3.0.3
1414
Flask-Caching==2.3.0
15-
Flask-Cors==4.0.1
15+
Flask-Cors==5.0.0
1616
Flask-Limiter==3.8.0
1717
flask-marshmallow==1.2.1
1818
flask-restx==1.3.0
1919
Flask-SQLAlchemy==3.1.1
20-
greenlet==3.0.3
21-
idna==3.7
22-
importlib_resources==6.4.3
20+
greenlet==3.1.1
21+
idna==3.10
22+
importlib_resources==6.4.5
2323
iniconfig==2.0.0
2424
itsdangerous==2.2.0
2525
Jinja2==3.1.4
@@ -36,25 +36,25 @@ mysqlclient==2.2.4
3636
ordered-set==4.1.0
3737
packaging==24.1
3838
pathspec==0.12.1
39-
platformdirs==4.2.2
39+
platformdirs==4.3.6
4040
pluggy==1.5.0
4141
pycodestyle==2.12.1
4242
pyflakes==3.2.0
4343
Pygments==2.18.0
4444
pyrsistent==0.20.0
45-
pytest==8.3.2
45+
pytest==8.3.3
4646
python-dateutil==2.9.0.post0
47-
pytz==2024.1
48-
redis==5.0.8
47+
pytz==2024.2
48+
redis==5.1.0
4949
referencing==0.35.1
5050
requests==2.32.3
51-
rich==13.7.1
51+
rich==13.8.1
5252
rpds-py==0.20.0
53-
setuptools==73.0.1
53+
setuptools==75.1.0
5454
six==1.16.0
55-
SQLAlchemy==2.0.32
55+
SQLAlchemy==2.0.35
5656
typing_extensions==4.12.2
57-
urllib3==2.2.2
58-
Werkzeug==3.0.3
57+
urllib3==2.2.3
58+
Werkzeug==3.0.4
5959
wheel==0.44.0
6060
wrapt==1.16.0

0 commit comments

Comments
 (0)