|
| 1 | +from os import listdir |
| 2 | +from os.path import isfile, join |
| 3 | +import sys |
| 4 | +import re |
| 5 | +import argparse |
| 6 | +import matplotlib.pyplot as plt |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +# Python 3.6+ |
| 10 | +# relies on dict insertion order |
| 11 | + |
| 12 | +# Check Matplotlib colors when building your config files: https://matplotlib.org/stable/gallery/color/named_colors.html |
| 13 | + |
| 14 | +roman2arabic = {"chrI":"chr1","chrII":"chr2","chrIII":"chr3","chrIV":"chr4","chrV":"chr5", |
| 15 | + "chrVI":"chr6","chrVII":"chr7","chrVIII":"chr8","chrIX":"chr9","chrX":"chr10", |
| 16 | + "chrXI":"chr11","chrXII":"chr12","chrXIII":"chr13","chrXIV":"chr14","chrXV":"chr15", |
| 17 | + "chrXVI":"chr16",} |
| 18 | + |
| 19 | +def getParams(): |
| 20 | + '''Parse parameters from the command line''' |
| 21 | + parser = argparse.ArgumentParser(description='') |
| 22 | + |
| 23 | + parser.add_argument('-t','--title', metavar='figure_title', dest='title', required=True, help='') |
| 24 | + parser.add_argument('-c','--config', metavar='config_fn', dest='config_fn', required=True, help='the config file for a grid-organized subplot') |
| 25 | + parser.add_argument('-g','--features-gff', metavar='features_gff', dest='features_gff', required=True, help='the featuer GFF file from SGD to get the gene coordinates') |
| 26 | + parser.add_argument('-d','--header', dest='header', default=False, required=False, help='skip first line as column header') |
| 27 | + |
| 28 | + args = parser.parse_args() |
| 29 | + return(args) |
| 30 | + |
| 31 | +def get_bedgraph_info(bedgraph_file, locus_coord, flanking): |
| 32 | + flanking_range = (roman2arabic[locus_coord[0]], locus_coord[1]-flanking, locus_coord[2]+flanking) |
| 33 | + x_vector = list(range(flanking_range[1],flanking_range[2])) |
| 34 | + y_vector = [0] * len(x_vector) |
| 35 | + reader = open(bedgraph_file,'r') |
| 36 | + for line in reader: |
| 37 | + tokens = line.strip().split('\t') |
| 38 | + # Skip if chromosome doesn't match |
| 39 | + if(tokens[0]!=flanking_range[0]): |
| 40 | + continue |
| 41 | + # Skip if interval before interval of interest |
| 42 | + elif(int(tokens[1])<flanking_range[1] and int(tokens[2])<flanking_range[1]): |
| 43 | + continue |
| 44 | + # Skip if interval after interval of interest |
| 45 | + elif(int(tokens[1])>flanking_range[2] and int(tokens[2])>flanking_range[2]): |
| 46 | + continue |
| 47 | + value = int(tokens[3]) |
| 48 | + for local_x in range(int(tokens[1]),int(tokens[2])): |
| 49 | + if(local_x in x_vector): |
| 50 | + y_vector[x_vector.index(local_x)] = value |
| 51 | + reader.close() |
| 52 | + plot_info = {"X":x_vector,"Y":y_vector,"chrom":locus_coord[0]} |
| 53 | + return(plot_info) |
| 54 | + |
| 55 | +def parse_configs(configs_fn): |
| 56 | + subplot_configs = {} |
| 57 | + reader = open(configs_fn,'r') |
| 58 | + for line in reader: |
| 59 | + tokens = line.strip().split('\t') |
| 60 | + if(tokens[0]=="FLANKING"): |
| 61 | + subplot_configs.update({tokens[0]:int(tokens[1])}) |
| 62 | + continue |
| 63 | + elif(tokens[0]=="S_MAX_Y"): |
| 64 | + subplot_configs.update({tokens[0]:[ int(i) for i in tokens[1:]]}) |
| 65 | + continue |
| 66 | + subplot_configs.update({tokens[0]:tokens[1:]}) |
| 67 | + reader.close() |
| 68 | + # Count subplot dimensions |
| 69 | + subplot_configs.update({"N_SAMPLES":len(subplot_configs["SAMPLES"])}) |
| 70 | + subplot_configs.update({"N_LOCI":len(subplot_configs["LOCI"])}) |
| 71 | + # Validate configs |
| 72 | + for key in ["S_LABEL","S_COLOR","S_MAX_Y"]: |
| 73 | + if(len(subplot_configs[key])!=subplot_configs["N_SAMPLES"]): |
| 74 | + sys.stderr.write("Mismatch in number of samples with %i field. Exiting...\n" % (key)) |
| 75 | + quit() |
| 76 | + return(subplot_configs) |
| 77 | + |
| 78 | +def parse_gff(gff_fn, loci_list): |
| 79 | + locus2coord = {} |
| 80 | + reader = open(gff_fn,'r') |
| 81 | + for line in reader: |
| 82 | + if(line.find("#")==0): |
| 83 | + continue |
| 84 | + if(line.find(">")==0): |
| 85 | + break |
| 86 | + tokens = line.strip().split('\t') |
| 87 | + gene_name = "" |
| 88 | + for feature in tokens[8].split(';'): |
| 89 | + if(feature.find("gene=")!=0): |
| 90 | + continue |
| 91 | + gene_name = feature.split('=')[1] |
| 92 | + break |
| 93 | + if(gene_name in loci_list): |
| 94 | + locus2coord.update({gene_name:(tokens[0],int(tokens[3])-1,int(tokens[4]))}) |
| 95 | + reader.close() |
| 96 | + return(locus2coord) |
| 97 | + |
| 98 | +if __name__ == "__main__": |
| 99 | + '''Plot scatter''' |
| 100 | + args = getParams() |
| 101 | + |
| 102 | + CONFIGS = parse_configs(args.config_fn) |
| 103 | + LOCUS2COORD = parse_gff(args.features_gff, CONFIGS["LOCI"]) |
| 104 | + |
| 105 | + fig, asx = plt.subplots(CONFIGS["N_SAMPLES"],CONFIGS["N_LOCI"]) |
| 106 | + fig.suptitle(args.title) |
| 107 | + plt.tight_layout() |
| 108 | + for s in range(CONFIGS["N_SAMPLES"]): |
| 109 | + bedgraph_fn = "results/BedGraphs/%s.raw.bedgraph" % CONFIGS["SAMPLES"][s] |
| 110 | + for l in range(CONFIGS["N_LOCI"]): |
| 111 | + locus = CONFIGS["LOCI"][l] |
| 112 | + sys.stderr.write("Processing sample %s by locus %s...\n" % (bedgraph_fn, locus)) |
| 113 | + data = get_bedgraph_info(bedgraph_fn, LOCUS2COORD[locus], CONFIGS["FLANKING"]) |
| 114 | + # Plot data |
| 115 | + asx[s,l].fill_between(data["X"], data["Y"], color=CONFIGS["S_COLOR"][s]) |
| 116 | + asx[s,l].set_ylim(bottom=0,top=CONFIGS["S_MAX_Y"][s]) |
| 117 | + asx[s,l].label_outer() |
| 118 | + x0 = data["X"][0] |
| 119 | + xend = data["X"][-1] |
| 120 | + xstart = data["X"][0] + CONFIGS["FLANKING"] |
| 121 | + xstop = data["X"][-1] - CONFIGS["FLANKING"] |
| 122 | + asx[s,l].set_xlim([x0,xend]) |
| 123 | + plt.sca(asx[s,l]) |
| 124 | + plt.xticks([x0,xstart,xstop,xend],["-200","start","stop","+200"]) |
| 125 | + # Label Samples |
| 126 | + asx[s,0].set_ylabel(CONFIGS["S_LABEL"][s]) |
| 127 | + |
| 128 | + for l in range(CONFIGS["N_LOCI"]): |
| 129 | + coord = LOCUS2COORD[CONFIGS["LOCI"][l]] |
| 130 | + asx[CONFIGS["N_SAMPLES"]-1,l].set_xlabel("%s:%i-%i" % (coord[0],coord[1],coord[2])) |
| 131 | + fig.set_size_inches(14,8) |
| 132 | + #plt.show() |
| 133 | + out_pic_fn = args.title.replace(" ","_")+".svg" |
| 134 | + plt.savefig(out_pic_fn) |
| 135 | + print(out_pic_fn) |
0 commit comments