Skip to content

Commit 017bd6d

Browse files
author
William Lai
committed
Fix #15 Partial fix #5
Upgrade PE histogram to output PNG and histograms to user-selected directory. Default is now NO output
1 parent c52864c commit 017bd6d

3 files changed

Lines changed: 162 additions & 113 deletions

File tree

src/charts/Histogram.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package charts;
22

33
import java.awt.Color;
4+
import java.io.File;
45
import java.io.IOException;
56

67
import org.jfree.chart.ChartFactory;
78
import org.jfree.chart.ChartPanel;
9+
import org.jfree.chart.ChartUtilities;
810
import org.jfree.chart.JFreeChart;
911
import org.jfree.chart.plot.PlotOrientation;
1012
import org.jfree.chart.plot.XYPlot;
@@ -33,6 +35,25 @@ public static ChartPanel createBarChart(double[] y, int[] x) throws IOException
3335
return chartPanel;
3436
}
3537

38+
public static ChartPanel createBarChart(double[] y, int[] x, File output) throws IOException {
39+
final XYSeries series = new XYSeries("Frequency");
40+
for(int i = 0; i < x.length; i++) {
41+
series.add((double)x[i], (double)y[i]);
42+
}
43+
final XYSeriesCollection dataset = new XYSeriesCollection(series);
44+
45+
JFreeChart chart = createChart(dataset);
46+
final ChartPanel chartPanel = new ChartPanel(chart);
47+
chartPanel.setPreferredSize(new java.awt.Dimension(500, 270));
48+
49+
if(output != null) {
50+
int width = 640;
51+
int height = 480;
52+
ChartUtilities.saveChartAsPNG(output, chart, width, height);
53+
}
54+
return chartPanel;
55+
}
56+
3657
private static JFreeChart createChart(IntervalXYDataset dataset) throws IOException {
3758
final JFreeChart chart = ChartFactory.createXYBarChart(
3859
"Paired-End Insert Size Frequency Histogram", // chart title

src/scripts/BAM_Statistics/PEStats.java

Lines changed: 69 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,27 @@
3232

3333
@SuppressWarnings("serial")
3434
public class PEStats extends JFrame {
35+
3536
Vector<File> bamFiles = null;
36-
File output = null;
37+
private File OUTPUT_PATH = null;
38+
private boolean OUTPUT_STATUS = false;
39+
PrintStream OUT = null;
40+
private File OUTPNG = null;
3741
private static int MIN_INSERT = 0;
3842
private static int MAX_INSERT = 1000;
3943

4044
SamReader reader;
4145
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
4246

43-
PrintStream OUT = null;
44-
4547
final JLayeredPane layeredPane;
4648
final JTabbedPane tabbedPane;
4749
final JTabbedPane tabbedPane_Histogram;
4850
final JTabbedPane tabbedPane_InsertStats;
4951
final JTabbedPane tabbedPane_Duplication;
5052
final JTabbedPane tabbedPane_DupStats;
51-
52-
public PEStats(Vector<File> input, File o, int min, int max) {
53-
setTitle("BAM File Statistics");
53+
54+
public PEStats(Vector<File> input, File o, boolean out, int min, int max) {
55+
setTitle("BAM File Paired-end Statistics");
5456
setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
5557
setBounds(150, 150, 800, 600);
5658

@@ -79,39 +81,46 @@ public PEStats(Vector<File> input, File o, int min, int max) {
7981
tabbedPane.addTab("Duplication Stats", null, tabbedPane_DupStats, null);
8082

8183
bamFiles = input;
82-
output = o;
84+
OUTPUT_PATH = o;
85+
OUTPUT_STATUS = out;
8386
MIN_INSERT = min;
8487
MAX_INSERT = max;
8588
}
8689

8790
public void run() throws IOException {
88-
89-
if(output != null) {
90-
try {
91-
OUT = new PrintStream(output);
92-
} catch (FileNotFoundException e) {
93-
// TODO Auto-generated catch block
94-
e.printStackTrace();
95-
}
96-
97-
}
98-
//Print TimeStamp
99-
String time = getTimeStamp();
100-
if(OUT != null) OUT.println(time);
101-
91+
//Iterate through all BAM files in Vector
10292
for(int x = 0; x < bamFiles.size(); x++) {
93+
//Open Output File
94+
if(OUTPUT_STATUS) {
95+
String NAME = bamFiles.get(x).getName().split("\\.")[0];
96+
if(OUTPUT_PATH != null) {
97+
try {
98+
OUT = new PrintStream(new File(OUTPUT_PATH.getCanonicalPath() + File.separator + NAME + "_InsertHistogram.out"));
99+
OUTPNG = new File(OUTPUT_PATH.getCanonicalPath() + File.separator + NAME + "_PE.png");
100+
}
101+
catch (FileNotFoundException e) { e.printStackTrace(); }
102+
catch (IOException e) { e.printStackTrace(); }
103+
} else {
104+
try {
105+
OUT = new PrintStream(new File(NAME));
106+
OUTPNG = new File(NAME + "_PE.png");
107+
}
108+
catch (FileNotFoundException e) { e.printStackTrace(); }
109+
}
110+
}
111+
112+
//Print TimeStamp
113+
String time = getTimeStamp();
103114
JTextArea PE_STATS = new JTextArea();
104115
PE_STATS.setEditable(false);
105116
PE_STATS.append(time + "\n");
106117
JTextArea DUP_STATS = new JTextArea();
107118
DUP_STATS.setEditable(false);
108119
DUP_STATS.append(time + "\n");
109-
120+
110121
//Check if BAI index file exists
111122
File f = new File(bamFiles.get(x) + ".bai");
112123
if(f.exists() && !f.isDirectory()) {
113-
if(OUT != null) OUT.println(bamFiles.get(x).getName());
114-
if(OUT != null) OUT.println("Chromosome_ID\tChromosome_Size\tAligned_Reads");
115124
PE_STATS.append(bamFiles.get(x).getName() + "\n");
116125
PE_STATS.append("Chromosome_ID\tChromosome_Size\tAligned_Reads\n");
117126
DUP_STATS.append(bamFiles.get(x).getName() + "\n");
@@ -131,45 +140,51 @@ public void run() throws IOException {
131140
HashMap<Integer, Integer> ALL_COMPLEXITY = new HashMap<Integer, Integer>();
132141

133142
//Variables which contain basic sequence information
134-
double totalTags = 0;
143+
double totalAlignedRead1 = 0;
144+
double totalAlignedRead2 = 0;
145+
double totalAlignedReads = 0;
135146
double totalGenome = 0;
136147

137148
for (int z = 0; z < bai.getNumberOfReferences(); z++) {
138149
SAMSequenceRecord seq = reader.getFileHeader().getSequence(z);
139150
double aligned = reader.indexing().getIndex().getMetaData(z).getAlignedRecordCount();
140151

141152
//Basic statistic calculations
142-
if(OUT != null) OUT.println(seq.getSequenceName() + "\t" + seq.getSequenceLength() + "\t" + aligned);
143153
PE_STATS.append(seq.getSequenceName() + "\t" + seq.getSequenceLength() + "\t" + aligned + "\n");
144-
totalTags += aligned;
145154
totalGenome += seq.getSequenceLength();
146155

147156
//Loop through each chromosome looking at each perfect F-R PE read
148157
CHROM_COMPLEXITY = new HashMap<String, Integer>();
149158
CloseableIterator<SAMRecord> iter = reader.query(seq.getSequenceName(), 0, seq.getSequenceLength(), false);
150159
while (iter.hasNext()) {
151160
//Create the record object
152-
SAMRecord sr = iter.next();
153-
154-
if(sr.getReadPairedFlag()) {
155-
if(sr.getProperPairFlag() && sr.getFirstOfPairFlag()) {
161+
SAMRecord sr = iter.next();
162+
163+
if(!sr.getReadUnmappedFlag()) { //Test for mapped read
164+
if(sr.getReadPairedFlag()) { //Test for paired-end status
165+
if(sr.getSecondOfPairFlag()) { totalAlignedRead2++; } //count read 2
166+
else if(sr.getFirstOfPairFlag()) { totalAlignedRead1++; } // count read 1
156167
//Insert size calculations
157-
int distance = Math.abs(sr.getInferredInsertSize());
158-
if(distance <= MAX_INSERT && distance >= MIN_INSERT) HIST[distance - MIN_INSERT]++;
159-
InsertAverage += distance;
160-
counter++;
161-
162-
//Unique ID
163-
String tagName = sr.getAlignmentStart() + "_" + sr.getMateAlignmentStart() + "_" + sr.getInferredInsertSize();
164-
//Duplication rate for each chrom determined
165-
if(CHROM_COMPLEXITY.isEmpty()) {
166-
CHROM_COMPLEXITY.put(tagName, new Integer(1));
167-
} else if(!CHROM_COMPLEXITY.containsKey(tagName)) {
168-
CHROM_COMPLEXITY.put(tagName, new Integer(1));
169-
} else if(CHROM_COMPLEXITY.containsKey(tagName)){
170-
CHROM_COMPLEXITY.put(tagName, new Integer(((Integer) CHROM_COMPLEXITY.get(tagName)).intValue() + 1));
168+
if(sr.getProperPairFlag() && sr.getFirstOfPairFlag()) {
169+
//Insert size calculations
170+
int distance = Math.abs(sr.getInferredInsertSize());
171+
if(distance <= MAX_INSERT && distance >= MIN_INSERT) HIST[distance - MIN_INSERT]++;
172+
InsertAverage += distance;
173+
counter++;
174+
175+
//Unique ID
176+
String tagName = sr.getAlignmentStart() + "_" + sr.getMateAlignmentStart() + "_" + sr.getInferredInsertSize();
177+
//Duplication rate for each chrom determined
178+
if(CHROM_COMPLEXITY.isEmpty()) {
179+
CHROM_COMPLEXITY.put(tagName, new Integer(1));
180+
} else if(!CHROM_COMPLEXITY.containsKey(tagName)) {
181+
CHROM_COMPLEXITY.put(tagName, new Integer(1));
182+
} else if(CHROM_COMPLEXITY.containsKey(tagName)){
183+
CHROM_COMPLEXITY.put(tagName, new Integer(((Integer) CHROM_COMPLEXITY.get(tagName)).intValue() + 1));
184+
}
171185
}
172-
186+
} else { //If the read is mapped but not paired-end, default to read 1
187+
totalAlignedRead1++;
173188
}
174189
}
175190
}
@@ -188,45 +203,31 @@ public void run() throws IOException {
188203
}
189204
}
190205
}
191-
192-
if(OUT != null) OUT.println("Total Genome Size: " + totalGenome + "\tTotal Aligned Tags: " + totalTags + "\n");
193-
PE_STATS.append("Total Genome Size: " + totalGenome + "\tTotal Aligned Tags: " + totalTags + "\n\n");
206+
totalAlignedReads = totalAlignedRead1 + totalAlignedRead2;
207+
PE_STATS.append("Total Genome Size: " + totalGenome + "\tTotal Aligned Tags: " + totalAlignedReads + "\n\n");
194208

195209
//Output replicates used to make bam file
196210
for( String comment : reader.getFileHeader().getComments()) {
197-
if(OUT != null) OUT.println(comment);
198211
PE_STATS.append(comment + "\n");
199212
}
200213

201214
//Output program used to align bam file
202215
for (int z = 0; z < reader.getFileHeader().getProgramRecords().size(); z++) {
203-
if(OUT != null) {
204-
OUT.print(reader.getFileHeader().getProgramRecords().get(z).getId() + "\t");
205-
OUT.println(reader.getFileHeader().getProgramRecords().get(z).getProgramVersion());
206-
OUT.println(reader.getFileHeader().getProgramRecords().get(z).getCommandLine());
207-
}
208216
PE_STATS.append(reader.getFileHeader().getProgramRecords().get(z).getId() + "\t");
209217
PE_STATS.append(reader.getFileHeader().getProgramRecords().get(z).getProgramVersion() + "\n");
210218
PE_STATS.append(reader.getFileHeader().getProgramRecords().get(z).getCommandLine() + "\n");
211219
}
212220
reader.close();
213221
bai.close();
214-
215-
if(OUT != null) OUT.println();
216222
PE_STATS.append("\n");
217223

218224
//Insert Size statistics
219225
if(counter != 0) InsertAverage /= counter;
220-
if(OUT != null) {
221-
OUT.println("Average Insert Size: " + InsertAverage);
222-
OUT.println("Median Insert Size: " + getMedian(HIST));
223-
OUT.println("Std deviation of Insert Size: " + getStdDev(HIST, InsertAverage));
224-
OUT.println("Number of ReadPairs: " + counter + "\n\nHistogram\nSize (bp)\tFrequency");
225-
}
226226
PE_STATS.append("Average Insert Size: " + InsertAverage + "\n");
227227
PE_STATS.append("Median Insert Size: " + getMedian(HIST) + "\n");
228228
PE_STATS.append("Std deviation of Insert Size: " + getStdDev(HIST, InsertAverage) + "\n");
229229
PE_STATS.append("Number of ReadPairs: " + counter + "\n\nHistogram\nSize (bp)\tFrequency\n");
230+
if(OUT != null) { OUT.println("InsertSize (bp)\t" + bamFiles.get(x).getName()); }
230231

231232
int[] DOMAIN = new int[(MAX_INSERT - MIN_INSERT) + 1];
232233
for(int z = 0; z < HIST.length; z++) {
@@ -263,19 +264,19 @@ public void run() throws IOException {
263264
DUP_STATS.setCaretPosition(0);
264265
JScrollPane dup_pane = new JScrollPane(DUP_STATS, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
265266
tabbedPane_DupStats.add(bamFiles.get(x).getName(), dup_pane);
266-
267-
tabbedPane_Histogram.add(bamFiles.get(x).getName(), Histogram.createBarChart(HIST, DOMAIN));
267+
268+
tabbedPane_Histogram.add(bamFiles.get(x).getName(), Histogram.createBarChart(HIST, DOMAIN, OUTPNG));
268269
tabbedPane_Duplication.add(bamFiles.get(x).getName(), LineChart.createLineChart(BIN, BIN_NAME));
269270

270271
firePropertyChange("bam",x, x + 1);
271-
272+
272273
} else {
273274
if(OUT != null) OUT.println("BAI Index File does not exist for: " + bamFiles.get(x).getName() + "\n");
274275
PE_STATS.append("BAI Index File does not exist for: " + bamFiles.get(x).getName() + "\n\n");
275276
DUP_STATS.append("BAI Index File does not exist for: " + bamFiles.get(x).getName() + "\n\n");
276277
}
277-
}
278-
if(OUT != null) OUT.close();
278+
if(OUT != null) OUT.close();
279+
}
279280
}
280281

281282
public static double getMedian(double[] histogram) {

0 commit comments

Comments
 (0)