|
4 | 4 | from api.models.annotations_lookup import AgiAlias |
5 | 5 | from api.models.eplant2 import Isoforms as EPlant2Isoforms |
6 | 6 | from api.models.eplant2 import Publications as EPlant2Publications |
| 7 | +from api.models.eplant2 import TAIR10GFF3 as EPlant2TAIR10_GFF3 |
| 8 | +from api.models.eplant2 import AgiAlias as EPlant2AgiAlias |
| 9 | +from api.models.eplant2 import AgiAnnotation as EPlant2AgiAnnotation |
7 | 10 | from api.models.eplant_poplar import Isoforms as EPlantPoplarIsoforms |
8 | 11 | from api.models.eplant_tomato import Isoforms as EPlantTomatoIsoforms |
9 | 12 | from api.models.eplant_soybean import Isoforms as EPlantSoybeanIsoforms |
|
14 | 17 |
|
15 | 18 | gene_information = Namespace("Gene Information", description="Information about Genes", path="/gene_information") |
16 | 19 |
|
| 20 | +parser = gene_information.parser() |
| 21 | +parser.add_argument( |
| 22 | + "terms", |
| 23 | + type=list, |
| 24 | + action="append", |
| 25 | + required=True, |
| 26 | + help="Gene IDs, format example: AT1G01010", |
| 27 | + default=["AT1G01020", "AT1G01030"], |
| 28 | +) |
| 29 | + |
17 | 30 | # I think this is only needed for Swagger UI POST |
18 | 31 | gene_information_request_fields = gene_information.model( |
19 | 32 | "GeneInformation", |
|
27 | 40 | }, |
28 | 41 | ) |
29 | 42 |
|
| 43 | +query_genes_request_fields = gene_information.model( |
| 44 | + "GeneInformation", |
| 45 | + { |
| 46 | + "species": fields.String(required=True, example="Arabidopsis_thaliana"), |
| 47 | + "terms": fields.List( |
| 48 | + required=True, |
| 49 | + example=["AT1G01010", "AT1G01020"], |
| 50 | + cls_or_instance=fields.String, |
| 51 | + ), |
| 52 | + }, |
| 53 | +) |
| 54 | + |
30 | 55 |
|
31 | 56 | # Validation is done in a different way to keep things simple |
32 | 57 | class GeneInformationSchema(Schema): |
@@ -135,6 +160,286 @@ def get(self, species="", gene_id=""): |
135 | 160 | return BARUtils.error_exit("There are no data found for the given gene") |
136 | 161 |
|
137 | 162 |
|
| 163 | +@gene_information.route("/genes_by_position/<string:species>/<string:chromosome>/<string:startParam>/<string:endParam>") |
| 164 | +class GeneTAIR10_GFF3(Resource): |
| 165 | + @gene_information.param("species", _in="path", default="arabidopsis") |
| 166 | + @gene_information.param("chromosome", _in="path", default="0") |
| 167 | + @gene_information.param("startParam", _in="path", default=3000) |
| 168 | + @gene_information.param("endParam", _in="path", default=6000) |
| 169 | + def get(self, species="", chromosome="", startParam="", endParam=""): |
| 170 | + """This end point provides genes given position.""" |
| 171 | + |
| 172 | + # Check if all parameters are provided |
| 173 | + if not chromosome or not startParam or not endParam: |
| 174 | + return BARUtils.error_exit("Missing parameters"), 400 |
| 175 | + |
| 176 | + # Check if the start param is smaller than end param |
| 177 | + if startParam >= endParam: |
| 178 | + return BARUtils.error_exit("Start location should be smaller than the end location") |
| 179 | + |
| 180 | + # Check if both parameters are valid figures |
| 181 | + if not BARUtils.is_integer(startParam) or not BARUtils.is_integer(endParam): |
| 182 | + return BARUtils.error_exit("At lease one parameter is not valid") |
| 183 | + |
| 184 | + # Escape input |
| 185 | + species = escape(species) |
| 186 | + chromosome = escape(chromosome) |
| 187 | + startParam = escape(startParam) |
| 188 | + endParam = escape(endParam) |
| 189 | + |
| 190 | + try: |
| 191 | + # Set database |
| 192 | + if species == "arabidopsis": |
| 193 | + database = EPlant2TAIR10_GFF3 |
| 194 | + |
| 195 | + # Map chromosome to its ID |
| 196 | + chromosomeId = '0' |
| 197 | + if chromosome == 'Chr1': |
| 198 | + chromosomeId = '1' |
| 199 | + elif chromosome == 'Chr2': |
| 200 | + chromosomeId = '2' |
| 201 | + elif chromosome == 'Chr3': |
| 202 | + chromosomeId = '3' |
| 203 | + elif chromosome == 'Chr4': |
| 204 | + chromosomeId = '4' |
| 205 | + elif chromosome == 'Chr5': |
| 206 | + chromosomeId = '5' |
| 207 | + elif chromosome == 'ChrC': |
| 208 | + chromosomeId = 'C' |
| 209 | + elif chromosome == 'ChrM': |
| 210 | + chromosomeId = 'M' |
| 211 | + else: |
| 212 | + return BARUtils.error_exit("Invalid chromosome"), 400 |
| 213 | + else: |
| 214 | + return BARUtils.error_exit("No data for the given species"), 400 |
| 215 | + |
| 216 | + # Construct the query |
| 217 | + atnumg = "AT" + chromosomeId + "G" |
| 218 | + query1 = db.select(database.geneId, database.Start, database.End, database.Strand).where( |
| 219 | + database.Type == "gene", |
| 220 | + database.geneId.startswith(atnumg), |
| 221 | + ( |
| 222 | + database.Start.between(startParam, endParam) | |
| 223 | + database.End.between(startParam, endParam) | |
| 224 | + ((database.Start < startParam) & (database.End > endParam)) |
| 225 | + ) |
| 226 | + ) |
| 227 | + result1 = db.session.execute(query1).all() |
| 228 | + gene_ids = [row[0] for row in result1] |
| 229 | + |
| 230 | + # Get aliases |
| 231 | + query2 = db.select(EPlant2AgiAlias.agi, EPlant2AgiAlias.alias).where(EPlant2AgiAlias.agi.in_(gene_ids)) |
| 232 | + result2 = db.session.execute(query2).all() |
| 233 | + all_aliases = {} |
| 234 | + for row in result2: |
| 235 | + if row[0] not in all_aliases: |
| 236 | + all_aliases[row[0]] = [] |
| 237 | + all_aliases[row[0]].append(row[1]) |
| 238 | + |
| 239 | + # Get annotation |
| 240 | + query3 = db.select(EPlant2AgiAnnotation.agi, EPlant2AgiAnnotation.annotation).where(EPlant2AgiAnnotation.agi.in_(gene_ids)) |
| 241 | + result3 = db.session.execute(query3).all() |
| 242 | + all_annotations = {} |
| 243 | + for row in result3: |
| 244 | + temp = row[1].split('__') |
| 245 | + if len(temp) > 1: |
| 246 | + all_annotations[row[0].upper()] = temp[1] |
| 247 | + else: |
| 248 | + all_annotations[row[0].upper()] = temp[0] |
| 249 | + |
| 250 | + genes = [] |
| 251 | + for row in result1: |
| 252 | + gene = { |
| 253 | + 'id': row[0], |
| 254 | + 'start': row[1], |
| 255 | + 'end': row[2], |
| 256 | + 'strand': row[3], |
| 257 | + 'aliases': all_aliases.get(row[0], []), |
| 258 | + 'annotation': all_annotations.get(row[0], None) |
| 259 | + } |
| 260 | + |
| 261 | + genes.append(gene) |
| 262 | + return BARUtils.success_exit(genes) |
| 263 | + |
| 264 | + except Exception as e: |
| 265 | + return BARUtils.error_exit(str(e)), 400 |
| 266 | + |
| 267 | + |
| 268 | +@gene_information.route("/gene_query") |
| 269 | +class GeneQueryGene(Resource): |
| 270 | + @gene_information.expect(query_genes_request_fields) |
| 271 | + def post(self): |
| 272 | + """This end point provides gene information for multiple genes given multiple terms.""" |
| 273 | + |
| 274 | + # Escape input |
| 275 | + data = request.get_json() |
| 276 | + species = data["species"] |
| 277 | + terms = data["terms"] |
| 278 | + for one_term in terms: |
| 279 | + one_term.upper() |
| 280 | + |
| 281 | + try: |
| 282 | + # Species check |
| 283 | + if species != "Arabidopsis_thaliana": |
| 284 | + return BARUtils.error_exit("No data for the given species"), 400 |
| 285 | + |
| 286 | + # Term check |
| 287 | + for one_term in terms: |
| 288 | + if not BARUtils.is_arabidopsis_gene_valid(one_term): |
| 289 | + return BARUtils.error_exit("Input list contains invalid term"), 400 |
| 290 | + |
| 291 | + database = EPlant2AgiAlias |
| 292 | + gene_ids = [] |
| 293 | + agi_fail = [] |
| 294 | + for one_term in terms: |
| 295 | + query = db.select(database.agi).where(database.agi.contains(one_term)).limit(1) |
| 296 | + result = db.session.execute(query).fetchone() |
| 297 | + if not result: |
| 298 | + agi_fail.append(one_term) |
| 299 | + else: |
| 300 | + gene_ids.append(result[0]) |
| 301 | + |
| 302 | + # For terms that do not have results |
| 303 | + database = EPlant2TAIR10_GFF3 |
| 304 | + for fail_term in agi_fail: |
| 305 | + query = db.select(database.geneId).where( |
| 306 | + ( |
| 307 | + (database.Type == 'gene') | |
| 308 | + (database.Type == 'transposable_element_gene') |
| 309 | + ), |
| 310 | + database.geneId.contains(fail_term) |
| 311 | + ).limit(1) |
| 312 | + result = db.session.execute(query).fetchone() |
| 313 | + if result: |
| 314 | + gene_ids.append(result[0]) |
| 315 | + |
| 316 | + # Find information for each term |
| 317 | + query = db.select(database.geneId, database.Start, database.End, database.Strand).where( |
| 318 | + ((database.Type == "gene") | (database.Type == "transposable_element_gene")), |
| 319 | + database.Source == "TAIR10", |
| 320 | + database.geneId.in_(gene_ids) |
| 321 | + ) |
| 322 | + result = db.session.execute(query).all() |
| 323 | + genes_info = {} |
| 324 | + for row in result: |
| 325 | + if row[0] not in genes_info: |
| 326 | + gene = {} |
| 327 | + gene['id'] = row[0] |
| 328 | + gene['chromosome'] = 'Chr' + row[0][2:3] |
| 329 | + gene['start'] = row[1] |
| 330 | + gene['end'] = row[2] |
| 331 | + gene['strand'] = row[3] |
| 332 | + gene['aliases'] = [] |
| 333 | + gene['annotation'] = None |
| 334 | + genes_info[row[0]] = gene |
| 335 | + |
| 336 | + # Get aliases |
| 337 | + database = EPlant2AgiAlias |
| 338 | + query = db.select(database.agi, database.alias).where(database.agi.in_(gene_ids)) |
| 339 | + result = db.session.execute(query).all() |
| 340 | + for row in result: |
| 341 | + if row[0] in genes_info: |
| 342 | + genes_info[row[0]]['aliases'].append(row[1]) |
| 343 | + |
| 344 | + # Get annotations |
| 345 | + database = EPlant2AgiAnnotation |
| 346 | + query = db.select(database.agi, database.annotation).where(database.agi.in_(gene_ids)) |
| 347 | + result = db.session.execute(query) |
| 348 | + for row in result: |
| 349 | + if row[0].upper() in genes_info: |
| 350 | + temp = row[1].split('__') |
| 351 | + if len(temp) > 1: |
| 352 | + genes_info[row[0].upper()]['annotation'] = temp[1] |
| 353 | + else: |
| 354 | + genes_info[row[0].upper()]['annotation'] = temp[0] |
| 355 | + |
| 356 | + return BARUtils.success_exit(genes_info) |
| 357 | + |
| 358 | + except Exception as e: |
| 359 | + return BARUtils.error_exit(str(e)), 400 |
| 360 | + |
| 361 | + |
| 362 | +@gene_information.route("/single_gene_query/<string:species>/<string:term>") |
| 363 | +class SingleGeneQueryGene(Resource): |
| 364 | + @gene_information.param("species", _in="path", default="Arabidopsis_thaliana") |
| 365 | + @gene_information.param("term", _in="path", default="AT1G01010") |
| 366 | + def get(self, species="", term=""): |
| 367 | + """This end point provides gene information for a single gene given one term.""" |
| 368 | + |
| 369 | + # Escape input |
| 370 | + species = escape(species) |
| 371 | + term = escape(term).upper() |
| 372 | + |
| 373 | + try: |
| 374 | + # Species check |
| 375 | + if species != "Arabidopsis_thaliana": |
| 376 | + return BARUtils.error_exit("No data for the given species"), 400 |
| 377 | + |
| 378 | + # Term check |
| 379 | + if not BARUtils.is_arabidopsis_gene_valid(term): |
| 380 | + return BARUtils.error_exit("Input term invalid"), 400 |
| 381 | + |
| 382 | + database = EPlant2AgiAlias |
| 383 | + query = db.select(database.agi).where(database.agi == term).limit(1) |
| 384 | + result = db.session.execute(query).fetchone() |
| 385 | + |
| 386 | + if not result: |
| 387 | + database = EPlant2TAIR10_GFF3 |
| 388 | + query = db.select(database.geneId).where( |
| 389 | + ( |
| 390 | + (database.Type == 'gene') | |
| 391 | + (database.Type == 'transposable_element_gene') |
| 392 | + ), |
| 393 | + database.geneId == term |
| 394 | + ).limit(1) |
| 395 | + result = db.session.execute(query).fetchone() |
| 396 | + |
| 397 | + genes_info = {} |
| 398 | + if result: |
| 399 | + # Find information for the term |
| 400 | + database = EPlant2TAIR10_GFF3 |
| 401 | + query = db.select(database.geneId, database.Start, database.End, database.Strand).where( |
| 402 | + ((database.Type == "gene") | (database.Type == "transposable_element_gene")), |
| 403 | + database.Source == "TAIR10", |
| 404 | + database.geneId == term |
| 405 | + ) |
| 406 | + result = db.session.execute(query).fetchone() |
| 407 | + |
| 408 | + gene = {} |
| 409 | + gene['id'] = result[0] |
| 410 | + gene['chromosome'] = 'Chr' + gene['id'][2:3] |
| 411 | + gene['start'] = result[1] |
| 412 | + gene['end'] = result[2] |
| 413 | + gene['strand'] = result[3] |
| 414 | + gene['aliases'] = [] |
| 415 | + gene['annotation'] = None |
| 416 | + genes_info[result[0]] = gene |
| 417 | + |
| 418 | + # Get aliases |
| 419 | + database = EPlant2AgiAlias |
| 420 | + query = db.select(database.agi, database.alias).where(database.agi == term) |
| 421 | + result = db.session.execute(query).all() |
| 422 | + for row in result: |
| 423 | + if row[1] not in gene['aliases']: |
| 424 | + gene['aliases'].append(row[1]) |
| 425 | + |
| 426 | + # Get annotations |
| 427 | + database = EPlant2AgiAnnotation |
| 428 | + query = db.select(database.agi, database.annotation).where(database.agi == term) |
| 429 | + result = db.session.execute(query).all() |
| 430 | + for row in result: |
| 431 | + temp = row[1].split('__') |
| 432 | + if len(temp) > 1: |
| 433 | + gene['annotation'] = temp[1] |
| 434 | + else: |
| 435 | + gene['annotation'] = temp[0] |
| 436 | + |
| 437 | + return BARUtils.success_exit(genes_info) |
| 438 | + |
| 439 | + except Exception as e: |
| 440 | + return BARUtils.error_exit(str(e)), 400 |
| 441 | + |
| 442 | + |
138 | 443 | @gene_information.route("/gene_isoforms/<string:species>/<string:gene_id>") |
139 | 444 | class GeneIsoforms(Resource): |
140 | 445 | @gene_information.param("species", _in="path", default="arabidopsis") |
|
0 commit comments