33import htsjdk .samtools .SAMRecord ;
44import htsjdk .samtools .SamReader ;
55import htsjdk .samtools .SamReaderFactory ;
6+ import htsjdk .samtools .filter .SamRecordFilter ;
67import htsjdk .samtools .util .CloseableIterator ;
78
89import java .io .File ;
910import java .util .Vector ;
11+ import java .lang .Math ;
1012
1113import objects .PileupParameters ;
1214import objects .CoordinateObjects .BEDCoord ;
@@ -19,7 +21,10 @@ public class PileupExtract implements Runnable{
1921 int index ;
2022 int subsetsize ;
2123 SamReader inputSam ;
22-
24+
25+ double [] TAG_S1 ;
26+ double [] TAG_S2 ;
27+
2328 public void run () {
2429 inputSam = SamReaderFactory .makeDefault ().open (BAM );
2530 for (int x = index ; x < index + subsetsize ; x ++) {
@@ -34,17 +39,16 @@ public PileupExtract(PileupParameters p, File b, Vector<BEDCoord> i, int current
3439 index = current ;
3540 subsetsize = length ;
3641 }
37-
42+
3843 public void extract (BEDCoord coord ) {
39- double [] TAG_S1 = null ;
40- double [] TAG_S2 = null ;
41- int SHIFT = param .getShift ();
42- int STRAND = param .getStrand ();
43-
44+ TAG_S1 = null ;
45+ TAG_S2 = null ;
46+
4447 // Ugly hack to account for the fact that read 1 5' end may be outside the window of interest even though read 2 and the midpoint may be in range
4548 // TODO FIX this into something more logical, probably check for read2 in region independently?
4649 int MIDPOINT_ADJUST = 0 ;
4750 if (param .getAspect () == 2 ) { MIDPOINT_ADJUST = 300 ; }
51+ if (param .getAspect () == 3 ) { MIDPOINT_ADJUST = 300 ; }
4852
4953 int BEDSTART = (int )coord .getStart ();
5054 int BEDSTOP = (int )coord .getStop ();
@@ -58,116 +62,31 @@ public void extract(BEDCoord coord) {
5862 }
5963 else if (param .getTrans () == 2 ) {
6064 WINDOW = (BEDSTOP - BEDSTART ) + (param .getBin () * param .getStdSize () * param .getStdNum () * 2 );
61- QUERYWINDOW = (param .getBin () * param .getStdSize () * param .getStdNum ());
65+ QUERYWINDOW = (param .getBin () * param .getStdSize () * param .getStdNum ());
6266 }
6367 TAG_S1 = new double [WINDOW ];
64- if (STRAND == 0 ) TAG_S2 = new double [WINDOW ];
65-
68+ if (param . getStrand () == 0 ) TAG_S2 = new double [WINDOW ];
69+ int sr_count = 0 ;
6670 //SAMRecords are 1-based and inclusive
67- CloseableIterator <SAMRecord > iter = inputSam .query (coord .getChrom (), BEDSTART - QUERYWINDOW - SHIFT - MIDPOINT_ADJUST - 1 , BEDSTOP + QUERYWINDOW + SHIFT + MIDPOINT_ADJUST + 1 , false );
71+ CloseableIterator <SAMRecord > iter = inputSam .query (coord .getChrom (), BEDSTART - QUERYWINDOW - param . getShift () - MIDPOINT_ADJUST - 1 , BEDSTOP + QUERYWINDOW + param . getShift () + MIDPOINT_ADJUST + 1 , false );
6872 while (iter .hasNext ()) {
6973 //Create the record object
7074 SAMRecord sr = iter .next ();
71-
72- if (sr .getReadPairedFlag ()) { //Must be PAIRED-END mapped
73- if ((sr .getProperPairFlag () && param .getPErequire ()) || !param .getPErequire ()) { //Must either be properly paired if paired-end or don't care about requirement
74- // (Aspect=5/3prime and (Read 1 and want Read 1, Read 2 and want Read 2, want any read)) or (Read 1 and want midpoint) or (Read 1 and want fragment)
75- if (((param .getAspect () == 0 || param .getAspect () == 1 ) && ((sr .getFirstOfPairFlag () && param .getRead () == 0 ) || (!sr .getFirstOfPairFlag () && param .getRead () == 1 ) || param .getRead () == 2 )) || (sr .getFirstOfPairFlag () && param .getAspect () == 2 ) || (sr .getFirstOfPairFlag () && param .getAspect () == 3 )) {
76- // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
77- int mark = sr .getUnclippedStart () - 1 ;
78- if ((param .getAspect () == 0 && sr .getReadNegativeStrandFlag ()) || (param .getAspect () == 1 && !sr .getReadNegativeStrandFlag ())) {
79- mark = sr .getUnclippedEnd () - 1 ;
80- }
81-
82- if (sr .getProperPairFlag ()) { //prevent cases where non-properly paired Read1 gets to this point
83- //Find midpoint if aspect flag == 2
84- if (param .getAspect () == 2 ) {
85- if (sr .getInferredInsertSize ()>0 ) {
86- mark = sr .getAlignmentStart () - 1 + (sr .getInferredInsertSize () / 2 );
87- } else if (sr .getInferredInsertSize ()<0 ) {
88- mark = sr .getMateAlignmentStart () - 1 - (sr .getInferredInsertSize () / 2 );
89- } else {
90- //Most aligners will flag records with an insert size of zero as improper pairs
91- System .err .println ("This statement should never print (insert size=0 when finding midpoint in PileupExtract)" );
92- continue ;
93- }
94- // Correction to ensure that even insert size mark reoriented for negative strands
95- if (sr .getInferredInsertSize () % 2 == 0 && coord .getDir ().equals ("-" ) ) { mark --; }
96- } else if (param .getAspect () == 3 ) {
97- if (sr .getInferredInsertSize ()>0 ) { mark = sr .getAlignmentStart () - 1 ; }
98- else if (sr .getInferredInsertSize ()<0 ) { mark = sr .getMateAlignmentStart () - 1 ; }
99- else {
100- //Most aligners will flag records with an insert size of zero as improper pairs
101- System .err .println ("This statement should never print (insert size=0 when finding fragment start in PileupExtract)" );
102- continue ;
103- }
104- }
105- // Apply insert size filters
106- if (Math .abs (sr .getInferredInsertSize ()) < param .getMinInsert () && param .getMinInsert () != -9999 ) { continue ; } //Test for MIN insert size cutoff here
107- if (Math .abs (sr .getInferredInsertSize ()) > param .getMaxInsert () && param .getMaxInsert () != -9999 ) { continue ; } //Test for MAX insert size cutoff here
108- } else if (param .getAspect () == 2 || param .getAspect () == 3 ) { continue ; } // Make sure that midpoint and fragment pileup must come from properly paired read
75+ // System.out.println("\n===SAM Record # " + sr_count + "===");
76+ // System.out.println("Read Name:" + sr.getReadName());
10977
110- // Shift as needed
111- if (sr .getReadNegativeStrandFlag ()) { mark -= SHIFT ; }
112- else { mark += SHIFT ; }
113-
114- //Adjust tag start to be within array reference
115- mark -= (BEDSTART - QUERYWINDOW );
116-
117- //Increment Final Array keeping track of pileup
118- if (param .getAspect () < 3 ) {
119- if (mark >= 0 && mark < TAG_S1 .length ) {
120- if (STRAND == 0 ) {
121- if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S2 [mark ] += 1 ; }
122- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S2 [mark ] += 1 ; }
123- else if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S1 [mark ] += 1 ; }
124- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S1 [mark ] += 1 ;}
125- } else {
126- TAG_S1 [mark ] += 1 ;
127- }
128- }
129- } else {
130- int markStop = Math .min (mark + Math .abs (sr .getInferredInsertSize ()), TAG_S1 .length );
131- for (int m = mark ; m < markStop ; m ++) {
132- if (m >= 0 && m < TAG_S1 .length ) {
133- if (STRAND == 0 ) {
134- if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S2 [m ] += 1 ; }
135- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S2 [m ] += 1 ; }
136- else if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S1 [m ] += 1 ; }
137- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S1 [m ] += 1 ;}
138- } else {
139- TAG_S1 [m ] += 1 ;
140- }
141- }
142- }
143- }
144- }
145- }
146- } else if ((param .getRead () == 0 || param .getRead () == 2 ) && (param .getAspect () == 0 || param .getAspect () == 1 )) { //Also outputs if not paired-end since by default it is read-1 (and 5' or 3' end)
147- // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
148- int mark = sr .getUnclippedStart () - 1 ;
149- if ((param .getAspect () == 0 && sr .getReadNegativeStrandFlag ()) || (param .getAspect () == 1 && !sr .getReadNegativeStrandFlag ())) {
150- mark = sr .getUnclippedEnd () - 1 ;
151- }
152- // Shift as needed
153- if (sr .getReadNegativeStrandFlag ()) { mark -= SHIFT ; }
154- else { mark += SHIFT ; }
155-
156- //Adjust tag start to be within array reference
157- mark -= (BEDSTART - QUERYWINDOW );
158-
159- //Increment Final Array keeping track of pileup
160- if (mark >= 0 && mark < TAG_S1 .length ) {
161- if (STRAND == 0 ) {
162- if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S2 [mark ] += 1 ; }
163- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S2 [mark ] += 1 ; }
164- else if (!sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("+" )) { TAG_S1 [mark ] += 1 ; }
165- else if (sr .getReadNegativeStrandFlag () && coord .getDir ().equals ("-" )) { TAG_S1 [mark ] += 1 ;}
166- } else {
167- TAG_S1 [mark ] += 1 ;
168- }
169- }
78+ if (param .getAspect ()==0 ) {
79+ addFivePrime (sr , coord , BEDSTART - QUERYWINDOW );
80+ } else if (param .getAspect ()==1 ) {
81+ addThreePrime (sr , coord , BEDSTART - QUERYWINDOW );
82+ } else if (param .getAspect ()==2 ) {
83+ addMidpoint (sr , coord , BEDSTART - QUERYWINDOW );
84+ } else if (param .getAspect ()==3 ) {
85+ addFragment (sr , coord , BEDSTART - QUERYWINDOW );
86+ } else {
87+ System .err .println ("Invalid read Aspect" );
17088 }
89+ sr_count ++;
17190 }
17291 iter .close ();
17392
@@ -211,4 +130,182 @@ else if(param.getTrans() == 2) {
211130 coord .setFstrand (finalF );
212131 coord .setRstrand (finalR );
213132 }
133+
134+ public void addFivePrime (SAMRecord sr , BEDCoord coord , int GENOMIC_SHIFT ) {
135+ if (sr .getReadPairedFlag ()) { //Must be PAIRED-END mapped
136+ if ((sr .getProperPairFlag () && param .getPErequire ()) || !param .getPErequire ()) { //Must either be properly paired if paired-end or don't care about requirement
137+ int mark = sr .getUnclippedStart () - 1 ;
138+ // Read 1 and want Read 1, Read 2 and want Read 2, want any read
139+ if ((sr .getFirstOfPairFlag () && param .getRead () == 0 ) || (!sr .getFirstOfPairFlag () && param .getRead () == 1 ) || param .getRead () == 2 ) {
140+ // Apply insert size filters
141+ if (sr .getProperPairFlag ()) { //prevent cases where non-properly paired Read1 gets to this point
142+ if (Math .abs (sr .getInferredInsertSize ()) < param .getMinInsert () && param .getMinInsert () != -9999 ) { return ; } //Test for MIN insert size cutoff here
143+ if (Math .abs (sr .getInferredInsertSize ()) > param .getMaxInsert () && param .getMaxInsert () != -9999 ) { return ; } //Test for MAX insert size cutoff here
144+ }
145+ // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
146+ if (sr .getReadNegativeStrandFlag ()) {
147+ mark = sr .getUnclippedEnd () - 1 ;
148+ }
149+ } else { return ; } // Skip pileup if read not wanted
150+ // Shift as needed
151+ if (sr .getReadNegativeStrandFlag ()) { mark -= param .getShift (); }
152+ else { mark += param .getShift (); }
153+ //Adjust tag start to be within array reference
154+ mark -= GENOMIC_SHIFT ;
155+ //Determine final array strandedness
156+ boolean useTAG_S2 = false ;
157+ if (param .getStrand () == 0 && (sr .getReadNegativeStrandFlag () != coord .getDir ().equals ("-" ))) { useTAG_S2 = true ; }
158+ //Increment Final Array keeping track of pileup
159+ for (int m = 0 ; m < param .getTagExtend () + 1 ; m ++) {
160+ if (mark >= 0 && mark < TAG_S1 .length ) {
161+ if (useTAG_S2 ) { TAG_S2 [mark ] += 1 ; }
162+ else { TAG_S1 [mark ] += 1 ; }
163+ }
164+ mark += sr .getInferredInsertSize () < 0 ? -1 : 1 ;
165+ }
166+ }
167+ } else if (param .getRead () == 0 || param .getRead () == 2 ) {
168+ // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
169+ int mark = sr .getUnclippedStart () - 1 ;
170+ if (sr .getReadNegativeStrandFlag ()) {
171+ mark = sr .getUnclippedEnd () - 1 ;
172+ }
173+ // Shift as needed
174+ if (sr .getReadNegativeStrandFlag ()) { mark -= param .getShift (); }
175+ else { mark += param .getShift (); }
176+ //Adjust tag start to be within array reference
177+ mark -= (GENOMIC_SHIFT );
178+ //Determine final array strandedness
179+ boolean useTAG_S2 = false ;
180+ if (param .getStrand () == 0 && (sr .getReadNegativeStrandFlag () != coord .getDir ().equals ("-" ))) { useTAG_S2 = true ; }
181+ //Increment Final Array keeping track of pileup
182+ for (int m = 0 ; m < param .getTagExtend () + 1 ; m ++) {
183+ if (mark >= 0 && mark < TAG_S1 .length ) {
184+ if (useTAG_S2 ) { TAG_S2 [mark ] += 1 ; }
185+ else { TAG_S1 [mark ] += 1 ; }
186+ }
187+ mark += sr .getInferredInsertSize () < 0 ? -1 : 1 ;
188+ }
189+ }
190+ }
191+
192+ public void addThreePrime (SAMRecord sr , BEDCoord coord , int GENOMIC_SHIFT ) {
193+ if (sr .getReadPairedFlag ()) { //Must be PAIRED-END mapped
194+ if ((sr .getProperPairFlag () && param .getPErequire ()) || !param .getPErequire ()) { //Must either be properly paired if paired-end or don't care about requirement
195+ int mark = sr .getUnclippedEnd () - 1 ;
196+ // Read 1 and want Read 1, Read 2 and want Read 2, want any read
197+ if ((sr .getFirstOfPairFlag () && param .getRead () == 0 ) || (!sr .getFirstOfPairFlag () && param .getRead () == 1 ) || param .getRead () == 2 ) {
198+ // Apply insert size filters
199+ if (sr .getProperPairFlag ()) { //prevent cases where non-properly paired Read1 gets to this point
200+ if (Math .abs (sr .getInferredInsertSize ()) < param .getMinInsert () && param .getMinInsert () != -9999 ) { return ; } //Test for MIN insert size cutoff here
201+ if (Math .abs (sr .getInferredInsertSize ()) > param .getMaxInsert () && param .getMaxInsert () != -9999 ) { return ; } //Test for MAX insert size cutoff here
202+ }
203+ // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
204+ if (sr .getReadNegativeStrandFlag ()) {
205+ mark = sr .getUnclippedStart () - 1 ;
206+ }
207+ } else { return ; } // Skip pileup if read not wanted
208+ // Shift as needed
209+ if (sr .getReadNegativeStrandFlag ()) { mark -= param .getShift (); }
210+ else { mark += param .getShift (); }
211+ //Adjust tag start to be within array reference
212+ mark -= GENOMIC_SHIFT ;
213+ //Determine final array strandedness
214+ boolean useTAG_S2 = false ;
215+ if (param .getStrand () == 0 && (sr .getReadNegativeStrandFlag () != coord .getDir ().equals ("-" ))) { useTAG_S2 = true ; }
216+ //Increment Final Array keeping track of pileup
217+ for (int m = 0 ; m < param .getTagExtend () + 1 ; m ++) {
218+ if (mark >= 0 && mark < TAG_S1 .length ) {
219+ if (useTAG_S2 ) { TAG_S2 [mark ] += 1 ; }
220+ else { TAG_S1 [mark ] += 1 ; }
221+ }
222+ mark += sr .getInferredInsertSize () < 0 ? -1 : 1 ;
223+ }
224+ }
225+ } else if (param .getRead () == 0 || param .getRead () == 2 ) {
226+ // Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
227+ int mark = sr .getUnclippedStart () - 1 ;
228+ if (sr .getReadNegativeStrandFlag ()) {
229+ mark = sr .getUnclippedEnd () - 1 ;
230+ }
231+ // Shift as needed
232+ if (sr .getReadNegativeStrandFlag ()) { mark -= param .getShift (); }
233+ else { mark += param .getShift (); }
234+ //Adjust tag start to be within array reference
235+ mark -= (GENOMIC_SHIFT );
236+ //Determine final array strandedness
237+ boolean useTAG_S2 = false ;
238+ if (param .getStrand () == 0 && (sr .getReadNegativeStrandFlag () != coord .getDir ().equals ("-" ))) { useTAG_S2 = true ; }
239+ //Increment Final Array keeping track of pileup
240+ for (int m = 0 ; m < param .getTagExtend () + 1 ; m ++) {
241+ if (mark >= 0 && mark < TAG_S1 .length ) {
242+ if (useTAG_S2 ) { TAG_S2 [mark ] += 1 ; }
243+ else { TAG_S1 [mark ] += 1 ; }
244+ }
245+ mark += sr .getInferredInsertSize () < 0 ? -1 : 1 ;
246+ }
247+ }
248+ }
249+
250+ public void addMidpoint (SAMRecord sr , BEDCoord coord , int GENOMIC_SHIFT ) {
251+ if (sr .getReadPairedFlag ()) { //Must be PAIRED-END mapped
252+ if (sr .getProperPairFlag () && sr .getFirstOfPairFlag ()) { //Must either be properly paired for midpoint, only first in pair to avoid double-counting
253+ // Apply insert size filters
254+ if (Math .abs (sr .getInferredInsertSize ()) < param .getMinInsert () && param .getMinInsert () != -9999 ) { return ; } //Test for MIN insert size cutoff here
255+ if (Math .abs (sr .getInferredInsertSize ()) > param .getMaxInsert () && param .getMaxInsert () != -9999 ) { return ; } //Test for MAX insert size cutoff here
256+ // Set marker
257+ int mark = sr .getMateAlignmentStart () - 1 - (sr .getInferredInsertSize () / 2 );
258+ if (sr .getInferredInsertSize ()>0 ) {
259+ mark = sr .getAlignmentStart () - 1 + (sr .getInferredInsertSize () / 2 );
260+ }
261+ // Correction to ensure that even insert size mark reoriented for negative strands
262+ // midpoint calculation rounds down but needs adjustment if reference interval on negative strand
263+ if (sr .getInferredInsertSize () % 2 == 0 && coord .getDir ().equals ("-" ) ) { mark --; }
264+ // // Shift as needed
265+ // if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
266+ // else { mark += param.getShift(); }
267+ //Adjust tag start to be within array reference
268+ mark -= GENOMIC_SHIFT ;
269+ // //Determine final array strandedness
270+ // boolean useTAG_S2 = false;
271+ // if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
272+ //Increment Final Array keeping track of pileup
273+ for (int m = 0 ; m < param .getTagExtend () + 1 ; m ++) {
274+ if (mark >= 0 && mark < TAG_S1 .length ) {
275+ TAG_S1 [mark ] += 1 ;
276+ }
277+ }
278+ }
279+ }
280+ }
281+
282+ private void addFragment (SAMRecord sr , BEDCoord coord , int GENOMIC_SHIFT ) {
283+ if (sr .getReadPairedFlag ()) { //Must be PAIRED-END mapped
284+ if (sr .getProperPairFlag () && sr .getFirstOfPairFlag ()) { //Must either be properly paired for midpoint, only first in pair to avoid double-counting
285+ // Apply insert size filters
286+ if (Math .abs (sr .getInferredInsertSize ()) < param .getMinInsert () && param .getMinInsert () != -9999 ) { return ; } //Test for MIN insert size cutoff here
287+ if (Math .abs (sr .getInferredInsertSize ()) > param .getMaxInsert () && param .getMaxInsert () != -9999 ) { return ; } //Test for MAX insert size cutoff here
288+ // Set marker
289+ int mark = sr .getAlignmentStart ();
290+ if (sr .getInferredInsertSize () < 0 ) {
291+ mark += sr .getInferredInsertSize ();
292+ }
293+ // Shift as needed
294+ if (sr .getReadNegativeStrandFlag ()) { mark -= param .getShift (); }
295+ else { mark += param .getShift (); }
296+ //Adjust tag start to be within array reference
297+ mark -= GENOMIC_SHIFT ;
298+ // //Determine final array strandedness
299+ // boolean useTAG_S2 = false;
300+ // if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
301+ //Increment Final Array keeping track of pileup
302+ for (int m = 0 ; m < Math .abs (sr .getInferredInsertSize ()) + param .getTagExtend (); m ++) {
303+ if (mark >= 0 && mark < TAG_S1 .length ) {
304+ TAG_S1 [mark ] += 1 ;
305+ }
306+ mark += sr .getInferredInsertSize () < 0 ? -1 : 1 ;
307+ }
308+ }
309+ }
310+ }
214311}
0 commit comments