3232
3333@ SuppressWarnings ("serial" )
3434public class PEStats extends JFrame {
35+
3536 Vector <File > bamFiles = null ;
36- File output = null ;
37+ private File OUTPUT_PATH = null ;
38+ private boolean OUTPUT_STATUS = false ;
39+ PrintStream OUT = null ;
40+ private File OUTPNG = null ;
3741 private static int MIN_INSERT = 0 ;
3842 private static int MAX_INSERT = 1000 ;
3943
4044 SamReader reader ;
4145 final SamReaderFactory factory = SamReaderFactory .makeDefault ().enable (SamReaderFactory .Option .INCLUDE_SOURCE_IN_RECORDS , SamReaderFactory .Option .VALIDATE_CRC_CHECKSUMS ).validationStringency (ValidationStringency .SILENT );
4246
43- PrintStream OUT = null ;
44-
4547 final JLayeredPane layeredPane ;
4648 final JTabbedPane tabbedPane ;
4749 final JTabbedPane tabbedPane_Histogram ;
4850 final JTabbedPane tabbedPane_InsertStats ;
4951 final JTabbedPane tabbedPane_Duplication ;
5052 final JTabbedPane tabbedPane_DupStats ;
51-
52- public PEStats (Vector <File > input , File o , int min , int max ) {
53- setTitle ("BAM File Statistics" );
53+
54+ public PEStats (Vector <File > input , File o , boolean out , int min , int max ) {
55+ setTitle ("BAM File Paired-end Statistics" );
5456 setDefaultCloseOperation (JFrame .DISPOSE_ON_CLOSE );
5557 setBounds (150 , 150 , 800 , 600 );
5658
@@ -79,39 +81,46 @@ public PEStats(Vector<File> input, File o, int min, int max) {
7981 tabbedPane .addTab ("Duplication Stats" , null , tabbedPane_DupStats , null );
8082
8183 bamFiles = input ;
82- output = o ;
84+ OUTPUT_PATH = o ;
85+ OUTPUT_STATUS = out ;
8386 MIN_INSERT = min ;
8487 MAX_INSERT = max ;
8588 }
8689
8790 public void run () throws IOException {
88-
89- if (output != null ) {
90- try {
91- OUT = new PrintStream (output );
92- } catch (FileNotFoundException e ) {
93- // TODO Auto-generated catch block
94- e .printStackTrace ();
95- }
96-
97- }
98- //Print TimeStamp
99- String time = getTimeStamp ();
100- if (OUT != null ) OUT .println (time );
101-
91+ //Iterate through all BAM files in Vector
10292 for (int x = 0 ; x < bamFiles .size (); x ++) {
93+ //Open Output File
94+ if (OUTPUT_STATUS ) {
95+ String NAME = bamFiles .get (x ).getName ().split ("\\ ." )[0 ];
96+ if (OUTPUT_PATH != null ) {
97+ try {
98+ OUT = new PrintStream (new File (OUTPUT_PATH .getCanonicalPath () + File .separator + NAME + "_InsertHistogram.out" ));
99+ OUTPNG = new File (OUTPUT_PATH .getCanonicalPath () + File .separator + NAME + "_PE.png" );
100+ }
101+ catch (FileNotFoundException e ) { e .printStackTrace (); }
102+ catch (IOException e ) { e .printStackTrace (); }
103+ } else {
104+ try {
105+ OUT = new PrintStream (new File (NAME ));
106+ OUTPNG = new File (NAME + "_PE.png" );
107+ }
108+ catch (FileNotFoundException e ) { e .printStackTrace (); }
109+ }
110+ }
111+
112+ //Print TimeStamp
113+ String time = getTimeStamp ();
103114 JTextArea PE_STATS = new JTextArea ();
104115 PE_STATS .setEditable (false );
105116 PE_STATS .append (time + "\n " );
106117 JTextArea DUP_STATS = new JTextArea ();
107118 DUP_STATS .setEditable (false );
108119 DUP_STATS .append (time + "\n " );
109-
120+
110121 //Check if BAI index file exists
111122 File f = new File (bamFiles .get (x ) + ".bai" );
112123 if (f .exists () && !f .isDirectory ()) {
113- if (OUT != null ) OUT .println (bamFiles .get (x ).getName ());
114- if (OUT != null ) OUT .println ("Chromosome_ID\t Chromosome_Size\t Aligned_Reads" );
115124 PE_STATS .append (bamFiles .get (x ).getName () + "\n " );
116125 PE_STATS .append ("Chromosome_ID\t Chromosome_Size\t Aligned_Reads\n " );
117126 DUP_STATS .append (bamFiles .get (x ).getName () + "\n " );
@@ -131,45 +140,51 @@ public void run() throws IOException {
131140 HashMap <Integer , Integer > ALL_COMPLEXITY = new HashMap <Integer , Integer >();
132141
133142 //Variables which contain basic sequence information
134- double totalTags = 0 ;
143+ double totalAlignedRead1 = 0 ;
144+ double totalAlignedRead2 = 0 ;
145+ double totalAlignedReads = 0 ;
135146 double totalGenome = 0 ;
136147
137148 for (int z = 0 ; z < bai .getNumberOfReferences (); z ++) {
138149 SAMSequenceRecord seq = reader .getFileHeader ().getSequence (z );
139150 double aligned = reader .indexing ().getIndex ().getMetaData (z ).getAlignedRecordCount ();
140151
141152 //Basic statistic calculations
142- if (OUT != null ) OUT .println (seq .getSequenceName () + "\t " + seq .getSequenceLength () + "\t " + aligned );
143153 PE_STATS .append (seq .getSequenceName () + "\t " + seq .getSequenceLength () + "\t " + aligned + "\n " );
144- totalTags += aligned ;
145154 totalGenome += seq .getSequenceLength ();
146155
147156 //Loop through each chromosome looking at each perfect F-R PE read
148157 CHROM_COMPLEXITY = new HashMap <String , Integer >();
149158 CloseableIterator <SAMRecord > iter = reader .query (seq .getSequenceName (), 0 , seq .getSequenceLength (), false );
150159 while (iter .hasNext ()) {
151160 //Create the record object
152- SAMRecord sr = iter .next ();
153-
154- if (sr .getReadPairedFlag ()) {
155- if (sr .getProperPairFlag () && sr .getFirstOfPairFlag ()) {
161+ SAMRecord sr = iter .next ();
162+
163+ if (!sr .getReadUnmappedFlag ()) { //Test for mapped read
164+ if (sr .getReadPairedFlag ()) { //Test for paired-end status
165+ if (sr .getSecondOfPairFlag ()) { totalAlignedRead2 ++; } //count read 2
166+ else if (sr .getFirstOfPairFlag ()) { totalAlignedRead1 ++; } // count read 1
156167 //Insert size calculations
157- int distance = Math .abs (sr .getInferredInsertSize ());
158- if (distance <= MAX_INSERT && distance >= MIN_INSERT ) HIST [distance - MIN_INSERT ]++;
159- InsertAverage += distance ;
160- counter ++;
161-
162- //Unique ID
163- String tagName = sr .getAlignmentStart () + "_" + sr .getMateAlignmentStart () + "_" + sr .getInferredInsertSize ();
164- //Duplication rate for each chrom determined
165- if (CHROM_COMPLEXITY .isEmpty ()) {
166- CHROM_COMPLEXITY .put (tagName , new Integer (1 ));
167- } else if (!CHROM_COMPLEXITY .containsKey (tagName )) {
168- CHROM_COMPLEXITY .put (tagName , new Integer (1 ));
169- } else if (CHROM_COMPLEXITY .containsKey (tagName )){
170- CHROM_COMPLEXITY .put (tagName , new Integer (((Integer ) CHROM_COMPLEXITY .get (tagName )).intValue () + 1 ));
168+ if (sr .getProperPairFlag () && sr .getFirstOfPairFlag ()) {
169+ //Insert size calculations
170+ int distance = Math .abs (sr .getInferredInsertSize ());
171+ if (distance <= MAX_INSERT && distance >= MIN_INSERT ) HIST [distance - MIN_INSERT ]++;
172+ InsertAverage += distance ;
173+ counter ++;
174+
175+ //Unique ID
176+ String tagName = sr .getAlignmentStart () + "_" + sr .getMateAlignmentStart () + "_" + sr .getInferredInsertSize ();
177+ //Duplication rate for each chrom determined
178+ if (CHROM_COMPLEXITY .isEmpty ()) {
179+ CHROM_COMPLEXITY .put (tagName , new Integer (1 ));
180+ } else if (!CHROM_COMPLEXITY .containsKey (tagName )) {
181+ CHROM_COMPLEXITY .put (tagName , new Integer (1 ));
182+ } else if (CHROM_COMPLEXITY .containsKey (tagName )){
183+ CHROM_COMPLEXITY .put (tagName , new Integer (((Integer ) CHROM_COMPLEXITY .get (tagName )).intValue () + 1 ));
184+ }
171185 }
172-
186+ } else { //If the read is mapped but not paired-end, default to read 1
187+ totalAlignedRead1 ++;
173188 }
174189 }
175190 }
@@ -188,45 +203,31 @@ public void run() throws IOException {
188203 }
189204 }
190205 }
191-
192- if (OUT != null ) OUT .println ("Total Genome Size: " + totalGenome + "\t Total Aligned Tags: " + totalTags + "\n " );
193- PE_STATS .append ("Total Genome Size: " + totalGenome + "\t Total Aligned Tags: " + totalTags + "\n \n " );
206+ totalAlignedReads = totalAlignedRead1 + totalAlignedRead2 ;
207+ PE_STATS .append ("Total Genome Size: " + totalGenome + "\t Total Aligned Tags: " + totalAlignedReads + "\n \n " );
194208
195209 //Output replicates used to make bam file
196210 for ( String comment : reader .getFileHeader ().getComments ()) {
197- if (OUT != null ) OUT .println (comment );
198211 PE_STATS .append (comment + "\n " );
199212 }
200213
201214 //Output program used to align bam file
202215 for (int z = 0 ; z < reader .getFileHeader ().getProgramRecords ().size (); z ++) {
203- if (OUT != null ) {
204- OUT .print (reader .getFileHeader ().getProgramRecords ().get (z ).getId () + "\t " );
205- OUT .println (reader .getFileHeader ().getProgramRecords ().get (z ).getProgramVersion ());
206- OUT .println (reader .getFileHeader ().getProgramRecords ().get (z ).getCommandLine ());
207- }
208216 PE_STATS .append (reader .getFileHeader ().getProgramRecords ().get (z ).getId () + "\t " );
209217 PE_STATS .append (reader .getFileHeader ().getProgramRecords ().get (z ).getProgramVersion () + "\n " );
210218 PE_STATS .append (reader .getFileHeader ().getProgramRecords ().get (z ).getCommandLine () + "\n " );
211219 }
212220 reader .close ();
213221 bai .close ();
214-
215- if (OUT != null ) OUT .println ();
216222 PE_STATS .append ("\n " );
217223
218224 //Insert Size statistics
219225 if (counter != 0 ) InsertAverage /= counter ;
220- if (OUT != null ) {
221- OUT .println ("Average Insert Size: " + InsertAverage );
222- OUT .println ("Median Insert Size: " + getMedian (HIST ));
223- OUT .println ("Std deviation of Insert Size: " + getStdDev (HIST , InsertAverage ));
224- OUT .println ("Number of ReadPairs: " + counter + "\n \n Histogram\n Size (bp)\t Frequency" );
225- }
226226 PE_STATS .append ("Average Insert Size: " + InsertAverage + "\n " );
227227 PE_STATS .append ("Median Insert Size: " + getMedian (HIST ) + "\n " );
228228 PE_STATS .append ("Std deviation of Insert Size: " + getStdDev (HIST , InsertAverage ) + "\n " );
229229 PE_STATS .append ("Number of ReadPairs: " + counter + "\n \n Histogram\n Size (bp)\t Frequency\n " );
230+ if (OUT != null ) { OUT .println ("InsertSize (bp)\t " + bamFiles .get (x ).getName ()); }
230231
231232 int [] DOMAIN = new int [(MAX_INSERT - MIN_INSERT ) + 1 ];
232233 for (int z = 0 ; z < HIST .length ; z ++) {
@@ -263,19 +264,19 @@ public void run() throws IOException {
263264 DUP_STATS .setCaretPosition (0 );
264265 JScrollPane dup_pane = new JScrollPane (DUP_STATS , JScrollPane .VERTICAL_SCROLLBAR_ALWAYS , JScrollPane .HORIZONTAL_SCROLLBAR_AS_NEEDED );
265266 tabbedPane_DupStats .add (bamFiles .get (x ).getName (), dup_pane );
266-
267- tabbedPane_Histogram .add (bamFiles .get (x ).getName (), Histogram .createBarChart (HIST , DOMAIN ));
267+
268+ tabbedPane_Histogram .add (bamFiles .get (x ).getName (), Histogram .createBarChart (HIST , DOMAIN , OUTPNG ));
268269 tabbedPane_Duplication .add (bamFiles .get (x ).getName (), LineChart .createLineChart (BIN , BIN_NAME ));
269270
270271 firePropertyChange ("bam" ,x , x + 1 );
271-
272+
272273 } else {
273274 if (OUT != null ) OUT .println ("BAI Index File does not exist for: " + bamFiles .get (x ).getName () + "\n " );
274275 PE_STATS .append ("BAI Index File does not exist for: " + bamFiles .get (x ).getName () + "\n \n " );
275276 DUP_STATS .append ("BAI Index File does not exist for: " + bamFiles .get (x ).getName () + "\n \n " );
276277 }
277- }
278- if ( OUT != null ) OUT . close ();
278+ if ( OUT != null ) OUT . close ();
279+ }
279280 }
280281
281282 public static double getMedian (double [] histogram ) {
0 commit comments