Skip to content

Commit 83c7abb

Browse files
committed
scripts to generate jitter plot of simulation scores
This commit includes a python script and an update to the tally job for generating jitter/strip plots of the simulation StrainID scores. scripts/make_jitter.py -seaborn library-based plot for showing spread of scores assigned to each strain for each simulation "experiment" (synthetic_strain x depth) job/tally_results.sh -PBS script updated with py script calls to generate figs
1 parent af12be4 commit 83c7abb

2 files changed

Lines changed: 86 additions & 0 deletions

File tree

paper/SyntheticStrain/job/tally_results.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,8 @@ do
3636
python $RUNTIME -i <(grep 'real' $DIR/ID/*.time) > $DIR\_runtimes.txt
3737
done
3838
done
39+
40+
# Make figs
41+
JITTER=scripts/make_jitter.py
42+
python $JITTER -i <(awk '{print "results/"$1"_"$2"_scores.txt"}' depth_simulations.txt |grep 'sacCer3') -t Fig4B
43+
python $JITTER -i <(awk '{print "results/"$1"_"$2"_scores.txt"}' depth_simulations.txt |grep 'hg19') -t Fig4C
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from os import listdir
2+
from os.path import isfile, join
3+
import sys
4+
import re
5+
import random
6+
import argparse
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
import seaborn as sns
10+
11+
# Python 3.6+
12+
# relies on dict insertion order
13+
14+
# Check Seaborn documentation: https://seaborn.pydata.org/generated/seaborn.swarmplot.html
15+
16+
def getParams():
17+
'''Parse parameters from the command line'''
18+
parser = argparse.ArgumentParser(description='')
19+
20+
parser.add_argument('-i','--input-files', metavar='file_list', dest='file_list', required=True, help='Script takes text file with a list of file paths to the parsed results of each simulation experiment "<genome>_<strain>\t<depth>\t..." (i.e. "depth_simulations.txt")')
21+
parser.add_argument('-t','--title', metavar='figure_title', dest='title', required=True, help='Title to add to figure which is also used to name output image file')
22+
23+
args = parser.parse_args()
24+
return(args)
25+
26+
def parse_data(data_file):
27+
'''Parse the data file with the simulation results (<genome>_<strain>_<depth>_scores.txt)'''
28+
data = {"value":[],"strain":[]}
29+
index_keys = []
30+
reader = open(data_file,'r')
31+
for line in reader:
32+
tokens = line.strip().split("\t")
33+
if(tokens[0].find("#")==0):
34+
# Initialize strain keys
35+
index_keys = tokens[1:]
36+
for strain in index_keys: data.update({strain:[]})
37+
continue
38+
for i in range(len(index_keys)):
39+
data["strain"].append(index_keys[i])
40+
# Set visualize-able values for "Inf" and "NaN"
41+
if(tokens[i+1]=="Inf"):
42+
data["value"].append(20.0)
43+
elif(tokens[i+1]=="NaN"):
44+
data["value"].append(-10.0)
45+
else:
46+
# Parse as float
47+
data["value"].append(float(tokens[i+1]))
48+
reader.close()
49+
50+
return(data)
51+
52+
if __name__ == "__main__":
53+
'''Plot jitter/stripplot in R fashion using seaborn library'''
54+
args = getParams()
55+
# Initialize variables
56+
SIZE = 2
57+
JITTER = 1
58+
all_data = {"experiment":[],"value":[],"strain":[]}
59+
# Parse list of file paths
60+
i_reader = open(args.file_list,'r')
61+
for line in i_reader:
62+
data_file = line.strip().split("\t")[0]
63+
# Parse datafile for values
64+
data = parse_data(data_file)
65+
# Merge values into master dataframe
66+
all_data["experiment"].extend([data_file]*len(data["value"]))
67+
all_data["value"].extend(data["value"])
68+
all_data["strain"].extend(data["strain"])
69+
i_reader.close()
70+
71+
# 1000 x 2 x 6 = 12000 points to plot
72+
# Plot data points using seaborn and label axes
73+
ax = sns.stripplot(x="experiment", y="value", hue="strain", data=all_data, size=SIZE, jitter=JITTER)
74+
ax.set_ylabel("log2 score")
75+
ax.set_xlabel("Simulation Experiment")
76+
ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
77+
# View/save plot
78+
#plt.show()
79+
out_png_fn = args.title.replace(" ","_")+".svg"
80+
plt.savefig(out_png_fn)
81+
#print(out_png_fn)

0 commit comments

Comments
 (0)