|
1 | 1 | package util; |
2 | 2 |
|
3 | 3 | import java.io.File; |
| 4 | +import java.io.FileNotFoundException; |
4 | 5 | import java.io.IOException; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.Arrays; |
| 8 | +import java.util.HashMap; |
| 9 | +import java.util.List; |
| 10 | +import java.util.Scanner; |
5 | 11 |
|
6 | 12 | import htsjdk.samtools.AbstractBAMFileIndex; |
7 | 13 | import htsjdk.samtools.SAMRecord; |
8 | 14 | import htsjdk.samtools.SAMSequenceRecord; |
9 | 15 | import htsjdk.samtools.SamReader; |
10 | 16 | import htsjdk.samtools.SamReaderFactory; |
| 17 | +import htsjdk.samtools.ValidationStringency; |
11 | 18 | import htsjdk.samtools.util.CloseableIterator; |
| 19 | +import objects.CoordinateObjects.BEDCoord; |
12 | 20 |
|
13 | 21 | public class BAMUtilities { |
14 | 22 |
|
@@ -50,7 +58,106 @@ public static double calculateStandardizationRatio(File BAM, int read) throws IO |
50 | 58 | else if(read == 2) { totalAligned = READ1 + READ2; } |
51 | 59 | else if(read == 3) { totalAligned = MID; } |
52 | 60 |
|
53 | | - if(totalAligned > 0) { return (totalAligned / totalGenome); } |
| 61 | + if(totalAligned > 0) { return (totalGenome / totalAligned); } |
54 | 62 | else { return 1; } |
55 | 63 | } |
| 64 | + |
| 65 | + public static double calculateStandardizationRatio(File BAM, File BLACKFile, int read) throws IOException { |
| 66 | + //Blacklist filter in 500bp blocks on the genome with any blacklist region overlapping negating the entire block |
| 67 | + int windowSize = 500; |
| 68 | + |
| 69 | + List<String> chromName = new ArrayList<String>(); |
| 70 | + List<Long> chromLength= new ArrayList<Long>(); |
| 71 | + double totalAligned = 0; |
| 72 | + double totalGenome = 0; |
| 73 | + |
| 74 | + SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT); |
| 75 | + SamReader inputBAM = factory.open(BAM); |
| 76 | + AbstractBAMFileIndex inputBAI = (AbstractBAMFileIndex) inputBAM.indexing().getIndex(); |
| 77 | + for (int z = 0; z < inputBAI.getNumberOfReferences(); z++) { |
| 78 | + SAMSequenceRecord rec = inputBAM.getFileHeader().getSequence(z); |
| 79 | + chromName.add(rec.getSequenceName()); |
| 80 | + chromLength.add(new Long(rec.getSequenceLength())); |
| 81 | + totalGenome += rec.getSequenceLength(); |
| 82 | + } |
| 83 | + inputBAM.close(); |
| 84 | + inputBAI.close(); |
| 85 | + |
| 86 | + //Load Blacklist into HashMap |
| 87 | + HashMap<String, ArrayList<BEDCoord>> BLACKLIST = loadBlacklist(BLACKFile); |
| 88 | + |
| 89 | + inputBAM = SamReaderFactory.makeDefault().open(BAM); |
| 90 | + for(int x = 0; x < chromName.size(); x++) { |
| 91 | + String seq = chromName.get(x); |
| 92 | + long chromSize = chromLength.get(x); |
| 93 | + //Blacklist filter each chromosome, set blacklisted windows to NaN |
| 94 | + float[] chrom = maskChrom(seq, chromSize, windowSize, BLACKLIST); |
| 95 | + //Iterate through chromosome loading tags into window bin |
| 96 | + CloseableIterator<SAMRecord> iter = inputBAM.query(seq, 0, (int)chromSize, false); |
| 97 | + //SAMRecords are 1-based |
| 98 | + while (iter.hasNext()) { |
| 99 | + SAMRecord sr = iter.next(); |
| 100 | + int FivePrime = sr.getUnclippedStart() - 1; |
| 101 | + if(sr.getReadNegativeStrandFlag()) { FivePrime = sr.getUnclippedEnd(); } |
| 102 | + int INDEX = (FivePrime / windowSize); |
| 103 | + if(!sr.getReadUnmappedFlag() && INDEX < chrom.length) { //Test for mappability |
| 104 | + if(sr.getReadPairedFlag()) { //Test for paired-end status |
| 105 | + if(sr.getSecondOfPairFlag() && read == 1) { chrom[INDEX]++; } //count read 2 |
| 106 | + else if(sr.getFirstOfPairFlag() && (read == 0 || read == 2)) { chrom[INDEX]++; } // count read 1 |
| 107 | + if(sr.getProperPairFlag() && sr.getFirstOfPairFlag() && read == 3) { chrom[INDEX]++; } //count properly paired reads for midpoint |
| 108 | + } else if(read == 0 || read == 2) { //If the read is mapped but not paired-end, default to read 1 |
| 109 | + chrom[INDEX]++; |
| 110 | + } |
| 111 | + } |
| 112 | + } |
| 113 | + iter.close(); |
| 114 | + for(int i = 0; i < chrom.length; i++) { if(!Float.isNaN(chrom[i])) { totalAligned += chrom[i]; } } |
| 115 | + } |
| 116 | + if(totalAligned > 0) { return (totalGenome / totalAligned); } |
| 117 | + else { return 1; } |
| 118 | + } |
| 119 | + |
| 120 | + private static float[] maskChrom(String chrom, long chromSize, int windowSize, HashMap<String, ArrayList<BEDCoord>> BLACKLIST) { |
| 121 | + float[] chromArray = new float[(int) (chromSize / windowSize) + 1]; |
| 122 | + if(BLACKLIST.containsKey(chrom)) { |
| 123 | + ArrayList<BEDCoord> blacklist = BLACKLIST.get(chrom); |
| 124 | + for(int x = 0; x < blacklist.size(); x++) { |
| 125 | + long START = blacklist.get(x).getStart(); |
| 126 | + long STOP = blacklist.get(x).getStop(); |
| 127 | + while(START < STOP) { |
| 128 | + int index = ((int)START / windowSize); |
| 129 | + if(index < chromArray.length) { chromArray[index] = Float.NaN; } |
| 130 | + START += windowSize; |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | + return chromArray; |
| 135 | + } |
| 136 | + |
| 137 | + private static HashMap<String, ArrayList<BEDCoord>> loadBlacklist(File BLACKFile) throws FileNotFoundException { |
| 138 | + HashMap<String, ArrayList<BEDCoord>> BLACKLIST = new HashMap<String, ArrayList<BEDCoord>>(); |
| 139 | + Scanner scan = new Scanner(BLACKFile); |
| 140 | + while (scan.hasNextLine()) { |
| 141 | + String[] temp = scan.nextLine().split("\t"); |
| 142 | + if(temp.length > 2) { |
| 143 | + if(!temp[0].contains("track") && !temp[0].contains("#")) { |
| 144 | + if(Integer.parseInt(temp[1]) >= 0) { |
| 145 | + int start = Integer.parseInt(temp[1]); |
| 146 | + int stop = Integer.parseInt(temp[2]); |
| 147 | + BEDCoord coord = new BEDCoord(temp[0], start, stop , "."); |
| 148 | + if(BLACKLIST.containsKey(temp[0])) { BLACKLIST.get(temp[0]).add(coord); } |
| 149 | + else { |
| 150 | + ArrayList<BEDCoord> newchrom = new ArrayList<BEDCoord>(); |
| 151 | + newchrom.add(coord); |
| 152 | + BLACKLIST.put(temp[0], newchrom); |
| 153 | + } |
| 154 | + } else { |
| 155 | + System.err.println("Invalid Coordinate in File!!!\n" + Arrays.toString(temp)); |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + scan.close(); |
| 161 | + return BLACKLIST; |
| 162 | + } |
56 | 163 | } |
0 commit comments