Skip to content

Commit 57a51ff

Browse files
committed
Blacklist Total-Tag normalization
Added ability to calculate blacklist total-tag normalization. Also changed ratio to * instead of /
1 parent 501c93a commit 57a51ff

1 file changed

Lines changed: 108 additions & 1 deletion

File tree

src/util/BAMUtilities.java

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
package util;
22

33
import java.io.File;
4+
import java.io.FileNotFoundException;
45
import java.io.IOException;
6+
import java.util.ArrayList;
7+
import java.util.Arrays;
8+
import java.util.HashMap;
9+
import java.util.List;
10+
import java.util.Scanner;
511

612
import htsjdk.samtools.AbstractBAMFileIndex;
713
import htsjdk.samtools.SAMRecord;
814
import htsjdk.samtools.SAMSequenceRecord;
915
import htsjdk.samtools.SamReader;
1016
import htsjdk.samtools.SamReaderFactory;
17+
import htsjdk.samtools.ValidationStringency;
1118
import htsjdk.samtools.util.CloseableIterator;
19+
import objects.CoordinateObjects.BEDCoord;
1220

1321
public class BAMUtilities {
1422

@@ -50,7 +58,106 @@ public static double calculateStandardizationRatio(File BAM, int read) throws IO
5058
else if(read == 2) { totalAligned = READ1 + READ2; }
5159
else if(read == 3) { totalAligned = MID; }
5260

53-
if(totalAligned > 0) { return (totalAligned / totalGenome); }
61+
if(totalAligned > 0) { return (totalGenome / totalAligned); }
5462
else { return 1; }
5563
}
64+
65+
public static double calculateStandardizationRatio(File BAM, File BLACKFile, int read) throws IOException {
66+
//Blacklist filter in 500bp blocks on the genome with any blacklist region overlapping negating the entire block
67+
int windowSize = 500;
68+
69+
List<String> chromName = new ArrayList<String>();
70+
List<Long> chromLength= new ArrayList<Long>();
71+
double totalAligned = 0;
72+
double totalGenome = 0;
73+
74+
SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
75+
SamReader inputBAM = factory.open(BAM);
76+
AbstractBAMFileIndex inputBAI = (AbstractBAMFileIndex) inputBAM.indexing().getIndex();
77+
for (int z = 0; z < inputBAI.getNumberOfReferences(); z++) {
78+
SAMSequenceRecord rec = inputBAM.getFileHeader().getSequence(z);
79+
chromName.add(rec.getSequenceName());
80+
chromLength.add(new Long(rec.getSequenceLength()));
81+
totalGenome += rec.getSequenceLength();
82+
}
83+
inputBAM.close();
84+
inputBAI.close();
85+
86+
//Load Blacklist into HashMap
87+
HashMap<String, ArrayList<BEDCoord>> BLACKLIST = loadBlacklist(BLACKFile);
88+
89+
inputBAM = SamReaderFactory.makeDefault().open(BAM);
90+
for(int x = 0; x < chromName.size(); x++) {
91+
String seq = chromName.get(x);
92+
long chromSize = chromLength.get(x);
93+
//Blacklist filter each chromosome, set blacklisted windows to NaN
94+
float[] chrom = maskChrom(seq, chromSize, windowSize, BLACKLIST);
95+
//Iterate through chromosome loading tags into window bin
96+
CloseableIterator<SAMRecord> iter = inputBAM.query(seq, 0, (int)chromSize, false);
97+
//SAMRecords are 1-based
98+
while (iter.hasNext()) {
99+
SAMRecord sr = iter.next();
100+
int FivePrime = sr.getUnclippedStart() - 1;
101+
if(sr.getReadNegativeStrandFlag()) { FivePrime = sr.getUnclippedEnd(); }
102+
int INDEX = (FivePrime / windowSize);
103+
if(!sr.getReadUnmappedFlag() && INDEX < chrom.length) { //Test for mappability
104+
if(sr.getReadPairedFlag()) { //Test for paired-end status
105+
if(sr.getSecondOfPairFlag() && read == 1) { chrom[INDEX]++; } //count read 2
106+
else if(sr.getFirstOfPairFlag() && (read == 0 || read == 2)) { chrom[INDEX]++; } // count read 1
107+
if(sr.getProperPairFlag() && sr.getFirstOfPairFlag() && read == 3) { chrom[INDEX]++; } //count properly paired reads for midpoint
108+
} else if(read == 0 || read == 2) { //If the read is mapped but not paired-end, default to read 1
109+
chrom[INDEX]++;
110+
}
111+
}
112+
}
113+
iter.close();
114+
for(int i = 0; i < chrom.length; i++) { if(!Float.isNaN(chrom[i])) { totalAligned += chrom[i]; } }
115+
}
116+
if(totalAligned > 0) { return (totalGenome / totalAligned); }
117+
else { return 1; }
118+
}
119+
120+
private static float[] maskChrom(String chrom, long chromSize, int windowSize, HashMap<String, ArrayList<BEDCoord>> BLACKLIST) {
121+
float[] chromArray = new float[(int) (chromSize / windowSize) + 1];
122+
if(BLACKLIST.containsKey(chrom)) {
123+
ArrayList<BEDCoord> blacklist = BLACKLIST.get(chrom);
124+
for(int x = 0; x < blacklist.size(); x++) {
125+
long START = blacklist.get(x).getStart();
126+
long STOP = blacklist.get(x).getStop();
127+
while(START < STOP) {
128+
int index = ((int)START / windowSize);
129+
if(index < chromArray.length) { chromArray[index] = Float.NaN; }
130+
START += windowSize;
131+
}
132+
}
133+
}
134+
return chromArray;
135+
}
136+
137+
private static HashMap<String, ArrayList<BEDCoord>> loadBlacklist(File BLACKFile) throws FileNotFoundException {
138+
HashMap<String, ArrayList<BEDCoord>> BLACKLIST = new HashMap<String, ArrayList<BEDCoord>>();
139+
Scanner scan = new Scanner(BLACKFile);
140+
while (scan.hasNextLine()) {
141+
String[] temp = scan.nextLine().split("\t");
142+
if(temp.length > 2) {
143+
if(!temp[0].contains("track") && !temp[0].contains("#")) {
144+
if(Integer.parseInt(temp[1]) >= 0) {
145+
int start = Integer.parseInt(temp[1]);
146+
int stop = Integer.parseInt(temp[2]);
147+
BEDCoord coord = new BEDCoord(temp[0], start, stop , ".");
148+
if(BLACKLIST.containsKey(temp[0])) { BLACKLIST.get(temp[0]).add(coord); }
149+
else {
150+
ArrayList<BEDCoord> newchrom = new ArrayList<BEDCoord>();
151+
newchrom.add(coord);
152+
BLACKLIST.put(temp[0], newchrom);
153+
}
154+
} else {
155+
System.err.println("Invalid Coordinate in File!!!\n" + Arrays.toString(temp));
156+
}
157+
}
158+
}
159+
}
160+
scan.close();
161+
return BLACKLIST;
162+
}
56163
}

0 commit comments

Comments
 (0)