Skip to content

Commit 6aa57f2

Browse files
committed
restructure PileupExtract
Previous commit for 3' and Fragment feature additions was buggy. This code restructure clarifies the ReadAspect & ReadOutput logic with expansion to include TagExtend in mind. #70 The `PileupParameters` object had "TagExtend" added with a fixed intialized value (not set yet by `TagPileupWindow` or `TagPileupCLI`) Each SAMRecord processed by a helper method that is determined by the Read Aspect value. Similar structure to v0.13 TagPileup within each helper method.
1 parent e6afc0a commit 6aa57f2

2 files changed

Lines changed: 215 additions & 109 deletions

File tree

src/objects/PileupParameters.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public class PileupParameters {
3131

3232
private int SHIFT = 0;
3333
private int BIN = 1;
34+
private int TAGEXTEND = 0;
3435
private int CPU = 1;
3536
private int OUTTYPE = 0; //0=no output, 1=TAB, 2=CDT
3637
private File BLACKLIST = null;
@@ -53,6 +54,7 @@ public void printAll(){
5354
System.out.println( "private int TRANS = " + TRANS );
5455
System.out.println( "private int SHIFT = " + SHIFT );
5556
System.out.println( "private int BIN = " + BIN );
57+
System.out.println( "private int TAGEXTEND = " + TAGEXTEND );
5658
System.out.println( "private int SMOOTH = " + SMOOTH );
5759
System.out.println( "private int STDSIZE = " + STDSIZE );
5860
System.out.println( "private int STDNUM = " + STDNUM );
@@ -190,6 +192,13 @@ public void setBin(int bIN) {
190192
BIN = bIN;
191193
}
192194

195+
public int getTagExtend() {
196+
return TAGEXTEND;
197+
}
198+
public void setTagExtend(int tAGEXTEND) {
199+
TAGEXTEND = tAGEXTEND;
200+
}
201+
193202
public int getSmooth() {
194203
return SMOOTH;
195204
}

src/scripts/Read_Analysis/PileupScripts/PileupExtract.java

Lines changed: 206 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import htsjdk.samtools.SAMRecord;
44
import htsjdk.samtools.SamReader;
55
import htsjdk.samtools.SamReaderFactory;
6+
import htsjdk.samtools.filter.SamRecordFilter;
67
import htsjdk.samtools.util.CloseableIterator;
78

89
import java.io.File;
910
import java.util.Vector;
11+
import java.lang.Math;
1012

1113
import objects.PileupParameters;
1214
import objects.CoordinateObjects.BEDCoord;
@@ -19,7 +21,10 @@ public class PileupExtract implements Runnable{
1921
int index;
2022
int subsetsize;
2123
SamReader inputSam;
22-
24+
25+
double[] TAG_S1;
26+
double[] TAG_S2;
27+
2328
public void run() {
2429
inputSam = SamReaderFactory.makeDefault().open(BAM);
2530
for(int x = index; x < index + subsetsize; x++) {
@@ -34,17 +39,16 @@ public PileupExtract(PileupParameters p, File b, Vector<BEDCoord> i, int current
3439
index = current;
3540
subsetsize = length;
3641
}
37-
42+
3843
public void extract(BEDCoord coord) {
39-
double[] TAG_S1 = null;
40-
double[] TAG_S2 = null;
41-
int SHIFT = param.getShift();
42-
int STRAND = param.getStrand();
43-
44+
TAG_S1 = null;
45+
TAG_S2 = null;
46+
4447
// Ugly hack to account for the fact that read 1 5' end may be outside the window of interest even though read 2 and the midpoint may be in range
4548
// TODO FIX this into something more logical, probably check for read2 in region independently?
4649
int MIDPOINT_ADJUST = 0;
4750
if(param.getAspect() == 2) { MIDPOINT_ADJUST = 300; }
51+
if(param.getAspect() == 3) { MIDPOINT_ADJUST = 300; }
4852

4953
int BEDSTART = (int)coord.getStart();
5054
int BEDSTOP = (int)coord.getStop();
@@ -58,116 +62,31 @@ public void extract(BEDCoord coord) {
5862
}
5963
else if(param.getTrans() == 2) {
6064
WINDOW = (BEDSTOP - BEDSTART) + (param.getBin() * param.getStdSize() * param.getStdNum() * 2);
61-
QUERYWINDOW = (param.getBin() * param.getStdSize() * param.getStdNum());
65+
QUERYWINDOW = (param.getBin() * param.getStdSize() * param.getStdNum());
6266
}
6367
TAG_S1 = new double[WINDOW];
64-
if(STRAND == 0) TAG_S2 = new double[WINDOW];
65-
68+
if(param.getStrand() == 0) TAG_S2 = new double[WINDOW];
69+
int sr_count = 0;
6670
//SAMRecords are 1-based and inclusive
67-
CloseableIterator<SAMRecord> iter = inputSam.query(coord.getChrom(), BEDSTART - QUERYWINDOW - SHIFT - MIDPOINT_ADJUST - 1, BEDSTOP + QUERYWINDOW + SHIFT + MIDPOINT_ADJUST + 1, false);
71+
CloseableIterator<SAMRecord> iter = inputSam.query(coord.getChrom(), BEDSTART - QUERYWINDOW - param.getShift() - MIDPOINT_ADJUST - 1, BEDSTOP + QUERYWINDOW + param.getShift() + MIDPOINT_ADJUST + 1, false);
6872
while (iter.hasNext()) {
6973
//Create the record object
7074
SAMRecord sr = iter.next();
71-
72-
if(sr.getReadPairedFlag()) { //Must be PAIRED-END mapped
73-
if((sr.getProperPairFlag() && param.getPErequire()) || !param.getPErequire()) { //Must either be properly paired if paired-end or don't care about requirement
74-
// (Aspect=5/3prime and (Read 1 and want Read 1, Read 2 and want Read 2, want any read)) or (Read 1 and want midpoint) or (Read 1 and want fragment)
75-
if(((param.getAspect() == 0 || param.getAspect() == 1) && ((sr.getFirstOfPairFlag() && param.getRead() == 0) || (!sr.getFirstOfPairFlag() && param.getRead() == 1) || param.getRead() == 2 )) || (sr.getFirstOfPairFlag() && param.getAspect() == 2) || (sr.getFirstOfPairFlag() && param.getAspect() == 3)) {
76-
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
77-
int mark = sr.getUnclippedStart() - 1;
78-
if((param.getAspect() == 0 && sr.getReadNegativeStrandFlag()) || (param.getAspect() == 1 && !sr.getReadNegativeStrandFlag())) {
79-
mark = sr.getUnclippedEnd() - 1;
80-
}
81-
82-
if(sr.getProperPairFlag()) { //prevent cases where non-properly paired Read1 gets to this point
83-
//Find midpoint if aspect flag == 2
84-
if(param.getAspect() == 2) {
85-
if(sr.getInferredInsertSize()>0) {
86-
mark = sr.getAlignmentStart() - 1 + (sr.getInferredInsertSize() / 2);
87-
} else if(sr.getInferredInsertSize()<0) {
88-
mark = sr.getMateAlignmentStart() - 1 - (sr.getInferredInsertSize() / 2);
89-
} else {
90-
//Most aligners will flag records with an insert size of zero as improper pairs
91-
System.err.println("This statement should never print (insert size=0 when finding midpoint in PileupExtract)");
92-
continue;
93-
}
94-
// Correction to ensure that even insert size mark reoriented for negative strands
95-
if(sr.getInferredInsertSize() % 2 == 0 && coord.getDir().equals("-") ) { mark--; }
96-
} else if(param.getAspect() == 3) {
97-
if(sr.getInferredInsertSize()>0) { mark = sr.getAlignmentStart() - 1; }
98-
else if(sr.getInferredInsertSize()<0) { mark = sr.getMateAlignmentStart() - 1; }
99-
else {
100-
//Most aligners will flag records with an insert size of zero as improper pairs
101-
System.err.println("This statement should never print (insert size=0 when finding fragment start in PileupExtract)");
102-
continue;
103-
}
104-
}
105-
// Apply insert size filters
106-
if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { continue; } //Test for MIN insert size cutoff here
107-
if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { continue; } //Test for MAX insert size cutoff here
108-
} else if(param.getAspect() == 2 || param.getAspect() == 3) { continue; } // Make sure that midpoint and fragment pileup must come from properly paired read
75+
// System.out.println("\n===SAM Record # " + sr_count + "===");
76+
// System.out.println("Read Name:" + sr.getReadName());
10977

110-
// Shift as needed
111-
if(sr.getReadNegativeStrandFlag()) { mark -= SHIFT; }
112-
else { mark += SHIFT; }
113-
114-
//Adjust tag start to be within array reference
115-
mark -= (BEDSTART - QUERYWINDOW);
116-
117-
//Increment Final Array keeping track of pileup
118-
if(param.getAspect() < 3) {
119-
if(mark >= 0 && mark < TAG_S1.length) {
120-
if(STRAND == 0) {
121-
if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S2[mark] += 1; }
122-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S2[mark] += 1; }
123-
else if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S1[mark] += 1; }
124-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S1[mark] += 1;}
125-
} else {
126-
TAG_S1[mark] += 1;
127-
}
128-
}
129-
} else {
130-
int markStop = Math.min(mark + Math.abs(sr.getInferredInsertSize()), TAG_S1.length);
131-
for(int m = mark; m < markStop; m++) {
132-
if(m >= 0 && m < TAG_S1.length) {
133-
if(STRAND == 0) {
134-
if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S2[m] += 1; }
135-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S2[m] += 1; }
136-
else if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S1[m] += 1; }
137-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S1[m] += 1;}
138-
} else {
139-
TAG_S1[m] += 1;
140-
}
141-
}
142-
}
143-
}
144-
}
145-
}
146-
} else if((param.getRead() == 0 || param.getRead() == 2) && (param.getAspect() == 0 || param.getAspect() == 1)) { //Also outputs if not paired-end since by default it is read-1 (and 5' or 3' end)
147-
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
148-
int mark = sr.getUnclippedStart() - 1;
149-
if((param.getAspect() == 0 && sr.getReadNegativeStrandFlag()) || (param.getAspect() == 1 && !sr.getReadNegativeStrandFlag())) {
150-
mark = sr.getUnclippedEnd() - 1;
151-
}
152-
// Shift as needed
153-
if(sr.getReadNegativeStrandFlag()) { mark -= SHIFT; }
154-
else { mark += SHIFT; }
155-
156-
//Adjust tag start to be within array reference
157-
mark -= (BEDSTART - QUERYWINDOW);
158-
159-
//Increment Final Array keeping track of pileup
160-
if(mark >= 0 && mark < TAG_S1.length) {
161-
if(STRAND == 0) {
162-
if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S2[mark] += 1; }
163-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S2[mark] += 1; }
164-
else if(!sr.getReadNegativeStrandFlag() && coord.getDir().equals("+")) { TAG_S1[mark] += 1; }
165-
else if(sr.getReadNegativeStrandFlag() && coord.getDir().equals("-")) { TAG_S1[mark] += 1;}
166-
} else {
167-
TAG_S1[mark] += 1;
168-
}
169-
}
78+
if(param.getAspect()==0) {
79+
addFivePrime(sr, coord, BEDSTART - QUERYWINDOW);
80+
} else if(param.getAspect()==1) {
81+
addThreePrime(sr, coord, BEDSTART - QUERYWINDOW);
82+
} else if(param.getAspect()==2) {
83+
addMidpoint(sr, coord, BEDSTART - QUERYWINDOW);
84+
} else if(param.getAspect()==3) {
85+
addFragment(sr, coord, BEDSTART - QUERYWINDOW);
86+
} else {
87+
System.err.println("Invalid read Aspect");
17088
}
89+
sr_count++;
17190
}
17291
iter.close();
17392

@@ -211,4 +130,182 @@ else if(param.getTrans() == 2) {
211130
coord.setFstrand(finalF);
212131
coord.setRstrand(finalR);
213132
}
133+
134+
public void addFivePrime(SAMRecord sr, BEDCoord coord, int GENOMIC_SHIFT) {
135+
if(sr.getReadPairedFlag()) { //Must be PAIRED-END mapped
136+
if((sr.getProperPairFlag() && param.getPErequire()) || !param.getPErequire()) { //Must either be properly paired if paired-end or don't care about requirement
137+
int mark = sr.getUnclippedStart() - 1;
138+
// Read 1 and want Read 1, Read 2 and want Read 2, want any read
139+
if((sr.getFirstOfPairFlag() && param.getRead() == 0) || (!sr.getFirstOfPairFlag() && param.getRead() == 1) || param.getRead() == 2) {
140+
// Apply insert size filters
141+
if(sr.getProperPairFlag()) { //prevent cases where non-properly paired Read1 gets to this point
142+
if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { return; } //Test for MIN insert size cutoff here
143+
if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { return; } //Test for MAX insert size cutoff here
144+
}
145+
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
146+
if(sr.getReadNegativeStrandFlag()) {
147+
mark = sr.getUnclippedEnd() - 1;
148+
}
149+
} else { return; } // Skip pileup if read not wanted
150+
// Shift as needed
151+
if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
152+
else { mark += param.getShift(); }
153+
//Adjust tag start to be within array reference
154+
mark -= GENOMIC_SHIFT;
155+
//Determine final array strandedness
156+
boolean useTAG_S2 = false;
157+
if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
158+
//Increment Final Array keeping track of pileup
159+
for(int m = 0; m < param.getTagExtend() + 1; m++) {
160+
if(mark >= 0 && mark < TAG_S1.length) {
161+
if(useTAG_S2) { TAG_S2[mark] += 1; }
162+
else { TAG_S1[mark] += 1; }
163+
}
164+
mark += sr.getInferredInsertSize() < 0 ? -1 : 1;
165+
}
166+
}
167+
} else if(param.getRead() == 0 || param.getRead() == 2) {
168+
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
169+
int mark = sr.getUnclippedStart() - 1;
170+
if(sr.getReadNegativeStrandFlag()) {
171+
mark = sr.getUnclippedEnd() - 1;
172+
}
173+
// Shift as needed
174+
if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
175+
else { mark += param.getShift(); }
176+
//Adjust tag start to be within array reference
177+
mark -= (GENOMIC_SHIFT);
178+
//Determine final array strandedness
179+
boolean useTAG_S2 = false;
180+
if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
181+
//Increment Final Array keeping track of pileup
182+
for(int m = 0; m < param.getTagExtend() + 1; m++) {
183+
if(mark >= 0 && mark < TAG_S1.length) {
184+
if(useTAG_S2) { TAG_S2[mark] += 1; }
185+
else { TAG_S1[mark] += 1; }
186+
}
187+
mark += sr.getInferredInsertSize() < 0 ? -1 : 1;
188+
}
189+
}
190+
}
191+
192+
public void addThreePrime(SAMRecord sr, BEDCoord coord, int GENOMIC_SHIFT) {
193+
if(sr.getReadPairedFlag()) { //Must be PAIRED-END mapped
194+
if((sr.getProperPairFlag() && param.getPErequire()) || !param.getPErequire()) { //Must either be properly paired if paired-end or don't care about requirement
195+
int mark = sr.getUnclippedEnd() - 1;
196+
// Read 1 and want Read 1, Read 2 and want Read 2, want any read
197+
if((sr.getFirstOfPairFlag() && param.getRead() == 0) || (!sr.getFirstOfPairFlag() && param.getRead() == 1) || param.getRead() == 2) {
198+
// Apply insert size filters
199+
if(sr.getProperPairFlag()) { //prevent cases where non-properly paired Read1 gets to this point
200+
if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { return; } //Test for MIN insert size cutoff here
201+
if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { return; } //Test for MAX insert size cutoff here
202+
}
203+
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
204+
if(sr.getReadNegativeStrandFlag()) {
205+
mark = sr.getUnclippedStart() - 1;
206+
}
207+
} else { return; } // Skip pileup if read not wanted
208+
// Shift as needed
209+
if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
210+
else { mark += param.getShift(); }
211+
//Adjust tag start to be within array reference
212+
mark -= GENOMIC_SHIFT;
213+
//Determine final array strandedness
214+
boolean useTAG_S2 = false;
215+
if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
216+
//Increment Final Array keeping track of pileup
217+
for(int m = 0; m < param.getTagExtend() + 1; m++) {
218+
if(mark >= 0 && mark < TAG_S1.length) {
219+
if(useTAG_S2) { TAG_S2[mark] += 1; }
220+
else { TAG_S1[mark] += 1; }
221+
}
222+
mark += sr.getInferredInsertSize() < 0 ? -1 : 1;
223+
}
224+
}
225+
} else if(param.getRead() == 0 || param.getRead() == 2) {
226+
// Set marker (left side default, right side if positive strand and 5 prime or negative strand and 3 prime
227+
int mark = sr.getUnclippedStart() - 1;
228+
if(sr.getReadNegativeStrandFlag()) {
229+
mark = sr.getUnclippedEnd() - 1;
230+
}
231+
// Shift as needed
232+
if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
233+
else { mark += param.getShift(); }
234+
//Adjust tag start to be within array reference
235+
mark -= (GENOMIC_SHIFT);
236+
//Determine final array strandedness
237+
boolean useTAG_S2 = false;
238+
if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
239+
//Increment Final Array keeping track of pileup
240+
for(int m = 0; m < param.getTagExtend() + 1; m++) {
241+
if(mark >= 0 && mark < TAG_S1.length) {
242+
if(useTAG_S2) { TAG_S2[mark] += 1; }
243+
else { TAG_S1[mark] += 1; }
244+
}
245+
mark += sr.getInferredInsertSize() < 0 ? -1 : 1;
246+
}
247+
}
248+
}
249+
250+
public void addMidpoint(SAMRecord sr, BEDCoord coord, int GENOMIC_SHIFT) {
251+
if(sr.getReadPairedFlag()) { //Must be PAIRED-END mapped
252+
if(sr.getProperPairFlag() && sr.getFirstOfPairFlag()) { //Must either be properly paired for midpoint, only first in pair to avoid double-counting
253+
// Apply insert size filters
254+
if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { return; } //Test for MIN insert size cutoff here
255+
if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { return; } //Test for MAX insert size cutoff here
256+
// Set marker
257+
int mark = sr.getMateAlignmentStart() - 1 - (sr.getInferredInsertSize() / 2);
258+
if(sr.getInferredInsertSize()>0) {
259+
mark = sr.getAlignmentStart() - 1 + (sr.getInferredInsertSize() / 2);
260+
}
261+
// Correction to ensure that even insert size mark reoriented for negative strands
262+
// midpoint calculation rounds down but needs adjustment if reference interval on negative strand
263+
if(sr.getInferredInsertSize() % 2 == 0 && coord.getDir().equals("-") ) { mark--; }
264+
// // Shift as needed
265+
// if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
266+
// else { mark += param.getShift(); }
267+
//Adjust tag start to be within array reference
268+
mark -= GENOMIC_SHIFT;
269+
// //Determine final array strandedness
270+
// boolean useTAG_S2 = false;
271+
// if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
272+
//Increment Final Array keeping track of pileup
273+
for(int m = 0; m < param.getTagExtend() + 1; m++) {
274+
if(mark >= 0 && mark < TAG_S1.length) {
275+
TAG_S1[mark] += 1;
276+
}
277+
}
278+
}
279+
}
280+
}
281+
282+
private void addFragment(SAMRecord sr, BEDCoord coord, int GENOMIC_SHIFT) {
283+
if(sr.getReadPairedFlag()) { //Must be PAIRED-END mapped
284+
if(sr.getProperPairFlag() && sr.getFirstOfPairFlag()) { //Must either be properly paired for midpoint, only first in pair to avoid double-counting
285+
// Apply insert size filters
286+
if(Math.abs(sr.getInferredInsertSize()) < param.getMinInsert() && param.getMinInsert() != -9999) { return; } //Test for MIN insert size cutoff here
287+
if(Math.abs(sr.getInferredInsertSize()) > param.getMaxInsert() && param.getMaxInsert() != -9999) { return; } //Test for MAX insert size cutoff here
288+
// Set marker
289+
int mark = sr.getAlignmentStart();
290+
if(sr.getInferredInsertSize() < 0) {
291+
mark += sr.getInferredInsertSize();
292+
}
293+
// Shift as needed
294+
if(sr.getReadNegativeStrandFlag()) { mark -= param.getShift(); }
295+
else { mark += param.getShift(); }
296+
//Adjust tag start to be within array reference
297+
mark -= GENOMIC_SHIFT;
298+
// //Determine final array strandedness
299+
// boolean useTAG_S2 = false;
300+
// if(param.getStrand() == 0 && (sr.getReadNegativeStrandFlag() != coord.getDir().equals("-"))) { useTAG_S2 = true; }
301+
//Increment Final Array keeping track of pileup
302+
for(int m = 0; m < Math.abs(sr.getInferredInsertSize()) + param.getTagExtend(); m++) {
303+
if(mark >= 0 && mark < TAG_S1.length) {
304+
TAG_S1[mark] += 1;
305+
}
306+
mark += sr.getInferredInsertSize() < 0 ? -1 : 1;
307+
}
308+
}
309+
}
310+
}
214311
}

0 commit comments

Comments
 (0)