55"""
66
77from flask_restx import Namespace , Resource , fields
8- from flask import request , jsonify
8+ from flask import request
99from markupsafe import escape
1010from api .utils .bar_utils import BARUtils
1111from marshmallow import Schema , ValidationError , fields as marshmallow_fields
1212from api import db
1313from api .models .rice_interactions import Interactions as RiceInteractions
1414from sqlalchemy import or_
1515
16- import tempfile
17- import os
18- import subprocess
19- from collections import defaultdict
20-
2116itrns = Namespace (
2217 "Interactions" ,
2318 description = "Interactions (protein-protein, protein-DNA, etc) endpoint" ,
4136 {
4237 "data" : fields .List (
4338 required = True ,
44- example = [["AT5G67420" , "AT1G12110" ],
45- ["AT5G67420" , "AT1G08090" ]],
39+ example = [["AT5G67420" , "AT1G12110" ], ["AT5G67420" , "AT1G08090" ]],
4640 cls_or_instance = fields .List (fields .String ),
4741 ),
4842 },
@@ -55,9 +49,7 @@ class GeneIntrnsSchema(Schema):
5549
5650
5751class MFinderDataSchema (Schema ):
58- data = marshmallow_fields .List (
59- marshmallow_fields .List (marshmallow_fields .String ())
60- )
52+ data = marshmallow_fields .List (marshmallow_fields .List (marshmallow_fields .String ()))
6153
6254
6355@itrns .route ("/<species>/<query_gene>" )
@@ -164,200 +156,3 @@ def post(self):
164156 return BARUtils .success_exit (res )
165157 else :
166158 return BARUtils .error_exit ("No data for the given species/genes" ), 400
167-
168-
169- @itrns .route ("/mfinder" )
170- class MFinder (Resource ):
171- @itrns .expect (post_int_data )
172- def post (self ):
173- """This endpoint was originally written by Vincent Lau to return mFinder
174- results to AGENT in his express node.JS app. However Tianhui Zhao refactored
175- to the BAR_API
176- """
177- data = request .get_json ()
178- # Validate json
179- try :
180- data = MFinderDataSchema ().load (data )
181- except ValidationError as err :
182- return BARUtils .error_exit (err .messages ), 400
183-
184- filtered_valid_arr = self .input_validation (data ['data' ])
185- if isinstance (filtered_valid_arr , str ):
186- return BARUtils .error_exit (filtered_valid_arr ), 400
187- settings = self .settings_validation (data .get ('options' , {}))
188- ret_json = self .create_files_and_mfinder (filtered_valid_arr , settings )
189- return jsonify (self .beautify_results (ret_json ))
190-
191- # Eliminates same pairs
192- def uniq_with (self , arr , comp_func ):
193- unique_arr = []
194- for item in arr :
195- if not any (comp_func (item , unique_item ) for unique_item in unique_arr ):
196- unique_arr .append (item )
197- return unique_arr
198-
199- def is_equal (self , a , b ):
200- return a == b
201-
202- def find_key (self , d , value ):
203- return next (key for key , val in d .items () if val == value )
204-
205- # Check if JSON body data obj is an array of arrays (2d arr)
206- # ex [ [ "AT1G010100", "AT5G01010" ], ["AT3G10000", "AT2G03240"]]
207- # {Array<Array<string>>} input: the above arr
208- def input_validation (self , input ):
209- if not isinstance (input , list ):
210- return "invalid JSON, not an arr"
211-
212- if len (input ) == 0 :
213- return "arr length 0!"
214-
215- if any (len (i ) != 2 for i in input ):
216- return "inner arr length is not of length 2!"
217-
218- if not all (isinstance (i , list ) for i in input ):
219- return "invalid JSON, check arr members are arrs!"
220-
221- if not all (isinstance (j , str ) for i in input for j in i ):
222- return "invalid JSON, check if inside arr members are strings!"
223-
224- if not all (BARUtils .is_arabidopsis_gene_valid (j ) for i in input for j in i ):
225- return "Invalid gene ID contained!"
226-
227- # filter self-edges and duplicate edges (mFinder does not accept)
228- return self .uniq_with ([i for i in input if i [0 ] != i [1 ]], self .is_equal )
229-
230- # Some mFinders params allowed within reasonable server load. Namely mFinder takes 3 basic params: nd (non-directed network),
231- # r (# of rand networks to gen), s (motif size), u (unique min), z (z-score min). The defaults are directed, 100, 3, 4, & 2
232- # respectively. HOWEVER choose r of 30 for speed
233- # Do a validation check on each value too!
234- # opts: the JSON settings object, can be empty in which we provide the default
235- def settings_validation (self , opts ):
236- opts = opts or {}
237- self .injection_check (opts )
238- settings_obj = opts .copy ()
239- if 'nd' not in opts :
240- settings_obj ['nd' ] = False
241- elif not isinstance (opts ['nd' ], bool ):
242- return "incorrect nd setting - is it boolean?" , 400
243-
244- if 'r' not in opts :
245- settings_obj ['r' ] = 50
246- elif not isinstance (opts ['r' ], int ) or opts ['r' ] > 150 :
247- return "incorrect r setting - is it a number under 151?" , 400
248-
249- if 's' not in opts :
250- settings_obj ['s' ] = 3
251- elif not isinstance (opts ['s' ], int ) or opts ['s' ] < 2 or opts ['s' ] > 4 :
252- return "incorrect s setting - is it a number between 2 and 4?" , 400
253-
254- if 'u' not in opts :
255- settings_obj ['u' ] = 4
256- elif not isinstance (opts ['u' ], int ) or opts ['u' ] > 999 :
257- return "incorrect u setting - is it a number or below 1000?" , 400
258-
259- if 'z' not in opts :
260- settings_obj ['z' ] = 2
261- elif not isinstance (opts ['z' ], int ) or opts ['z' ] > 99 :
262- return "incorrect z setting - is it a number or below 100?" , 400
263-
264- return settings_obj
265-
266- # Check for injection, throw if suspiciously long command is found.
267- # object: to validate for injection
268- def injection_check (self , obj ):
269- for key , value in obj .items ():
270- if len (str (value )) > 10 :
271- return f"{ key } settings param is too long" , 400
272-
273- # Take in the filtered array of gene-id pairs (edges) and perform
274- # mFinder analysis on them (create temp text files to do so)
275- # Performed SYNCHRONOUSLY !!!
276- def create_files_and_mfinder (self , input , opts_obj ):
277-
278- # give read/write permissions to user but nada to anybody else
279- tmpfile = tempfile .NamedTemporaryFile (mode = 'w+' , suffix = '.txt' , delete = False )
280- os .chmod (tmpfile .name , 0o600 )
281-
282- # get a hash of IDs -> numbers for later lookup and writable string
283- hash_of_ids , return_str = self .get_gene_id_hash_map (input )
284-
285- # write to temp file which mFinder will run/read on
286- tmpfile .write (return_str )
287- tmpfile .flush ()
288-
289- command = (
290- f"/bartmp/mfinder { tmpfile .name } "
291- f"-s { opts_obj ['s' ]} "
292- f"-r { opts_obj ['r' ]} "
293- f"-u { opts_obj ['u' ]} "
294- f"-z { opts_obj ['z' ]} "
295- f"{ '-nd ' if opts_obj .get ('nd' ) else '' } "
296- "-omem"
297- )
298- subprocess .run (command , shell = True , check = True )
299-
300- with open (tmpfile .name [:- 4 ] + "_OUT.txt" , 'r' ) as stats_file :
301- mfinder_stats = stats_file .read ()
302-
303- with open (tmpfile .name [:- 4 ] + "_MEMBERS.txt" , 'r' ) as members_file :
304- mfinder_members = members_file .read ()
305-
306- tmpfile .close ()
307- os .remove (tmpfile .name )
308-
309- return {'hashOfIds' : hash_of_ids , 'mFinderStats' : mfinder_stats , 'mFinderMembers' : mfinder_members }
310-
311- # Take an input of array of array of strings which represent edges and transform those gene IDs (unique!) to a hash table and
312- # coinciding edges i.e. [["PHE", "PAT"], ["PAT, "PAN"]] to "232 210 1 \n 210 100 1\n"
313- def get_gene_id_hash_map (self , input ):
314- hash_of_ids = defaultdict (lambda : None )
315- iter = 1
316- return_str = ""
317- for item in input :
318- if item [0 ] not in hash_of_ids .values ():
319- hash_of_ids [iter ] = item [0 ]
320- iter += 1
321- if item [1 ] not in hash_of_ids .values ():
322- hash_of_ids [iter ] = item [1 ]
323- iter += 1
324- return_str += f"{ self .find_key (hash_of_ids , item [0 ])} { self .find_key (hash_of_ids , item [1 ])} 1\n "
325-
326- return hash_of_ids , return_str
327-
328- # Beautify the output file string and members file string
329- def beautify_results (self , mfinder_res_obj ):
330- stats = mfinder_res_obj ['mFinderStats' ]
331- mems = mfinder_res_obj ['mFinderMembers' ]
332- id_map = mfinder_res_obj ['hashOfIds' ]
333- ret_obj = {'sigMotifs' : {}, 'motifList' : {}}
334-
335- try :
336- sig_motifs_str = stats .split ('[MILI]\t \n \n ' )[1 ].split ('Full' )[0 ].split ('\n \n ' )
337- # In case stats has less than 2 parts after split('[MILI]\t\n\n')[1]
338- except IndexError :
339- raise ValueError ("Expected delimiter '[MILI]\t \n \n ' or 'Full' not found in the stats string." )
340- sig_motifs_str = sig_motifs_str [:len (sig_motifs_str ) - 2 :2 ]
341- for item in sig_motifs_str :
342- split_stats_for_motif_id = item .split ('\t ' )
343- ret_obj ['sigMotifs' ][split_stats_for_motif_id [0 ]] = {
344- 'numAppearances' : split_stats_for_motif_id [1 ],
345- 'numAppearancesRand' : split_stats_for_motif_id [2 ],
346- 'appearancesZScore' : split_stats_for_motif_id [3 ],
347- 'pValue' : split_stats_for_motif_id [4 ],
348- 'uniq' : split_stats_for_motif_id [5 ],
349- 'conc' : split_stats_for_motif_id [6 ],
350- }
351-
352- subgraphs_list_str = mems .split ('subgraph id = ' )[1 :]
353- for subgraph_str in subgraphs_list_str :
354- member_list_split = subgraph_str .split ('\n ' )
355- motif_mem_list = [i .rstrip ('\t ' ) for i in member_list_split [5 :- 2 ]]
356- motif_mem_results = []
357- for i in motif_mem_list :
358- three_genes = i .split ('\t ' )
359- formatted_str = f"{ id_map [int (three_genes [0 ])]} \t { id_map [int (three_genes [1 ])]} \t { id_map [int (three_genes [2 ])]} " # i.e. PAT\tPAN\tEGFR
360- motif_mem_results .append (formatted_str )
361- ret_obj ['motifList' ][member_list_split [0 ]] = motif_mem_results
362-
363- return BARUtils .success_exit (ret_obj )
0 commit comments