|
| 1 | +package util; |
| 2 | + |
| 3 | +import java.util.ArrayList; |
| 4 | +import java.util.Arrays; |
| 5 | +import java.util.concurrent.ThreadLocalRandom; |
| 6 | + |
| 7 | +import objects.CoordinateObjects.GenericCoord; |
| 8 | + |
| 9 | +public class GenomeSizeReference { |
| 10 | + |
| 11 | + private static String genomeName = ""; |
| 12 | + private static long genomeSize = 0; |
| 13 | + |
| 14 | + private static ArrayList<String> chromName = null; |
| 15 | + private static ArrayList<Long> chromSize = null; |
| 16 | + |
| 17 | + public GenomeSizeReference() { |
| 18 | + chromName = new ArrayList<String>(); |
| 19 | + chromSize = new ArrayList<Long>(); |
| 20 | + } |
| 21 | + |
| 22 | + public GenomeSizeReference(String build) { |
| 23 | + if(build.equals("sacCer3_cegr")) { |
| 24 | + initialize_sacCer3_cegr(); |
| 25 | + } else if(build.equals("hg19")) { |
| 26 | + initialize_hg19(); |
| 27 | + } else if(build.equals("hg19_contigs")) { |
| 28 | + initialize_hg19_contig(); |
| 29 | + } else if(build.equals("mm10")) { |
| 30 | + initialize_mm10(); |
| 31 | + } else { |
| 32 | + System.err.println("Non-existent genome build!\n"); |
| 33 | + } |
| 34 | + } |
| 35 | + |
| 36 | + public void setGenome(String build) { |
| 37 | + if(build.equals("sacCer3_cegr")) { |
| 38 | + initialize_sacCer3_cegr(); |
| 39 | + } else if(build.equals("hg19")) { |
| 40 | + initialize_hg19(); |
| 41 | + } else if(build.equals("hg19_contigs")) { |
| 42 | + initialize_hg19_contig(); |
| 43 | + } else if(build.equals("mm10")) { |
| 44 | + initialize_mm10(); |
| 45 | + } else { |
| 46 | + System.err.println("Non-existent genome build!\n"); |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + public String getGenome() { |
| 51 | + return genomeName; |
| 52 | + } |
| 53 | + |
| 54 | + public long getGenomeSize() { |
| 55 | + return genomeSize; |
| 56 | + } |
| 57 | + |
| 58 | + public static void initialize_sacCer3_cegr() { |
| 59 | + genomeName = "sacCer3_cegr"; |
| 60 | + genomeSize = 12163423; |
| 61 | + String[] chrom = {"chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chrM", "2-micron"}; |
| 62 | + chromName = new ArrayList<String>(Arrays.asList(chrom)); |
| 63 | + long[] size = new long[] {230218, 813184, 316620, 1531933, 576874, 270161, 1090940, 562643, 439888, 745751, 666816, 1078177, 924431, 784333, 1091291, 948066, 85779, 6318}; |
| 64 | + chromSize = new ArrayList<Long>(); |
| 65 | + for(int x = 0; x < size.length; x++) { chromSize.add(new Long(size[x])); } |
| 66 | + } |
| 67 | + |
| 68 | + public static void initialize_hg19() { |
| 69 | + genomeName = "hg19"; |
| 70 | + genomeSize = (long) 2.725521370E9; |
| 71 | + String[] chrom = {"chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chrX", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr20", "chrY", "chr19", "chr22", "chr21"}; |
| 72 | + chromName = new ArrayList<String>(Arrays.asList(chrom)); |
| 73 | + long[] size = new long[] {249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 155270560, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 63025520, 59373566, 59128983, 51304566, 48129895}; |
| 74 | + chromSize = new ArrayList<Long>(); |
| 75 | + for(int x = 0; x < size.length; x++) { chromSize.add(new Long(size[x])); } |
| 76 | + } |
| 77 | + |
| 78 | + public static void initialize_hg19_contig() { |
| 79 | + genomeName = "hg19"; |
| 80 | + genomeSize = (long) 3.137161264E9; |
| 81 | + String[] chrom = {"chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chrX", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr20", "chrY", "chr19", "chr22", "chr21", |
| 82 | + "chr6_ssto_hap7", "chr6_mcf_hap5", "chr6_cox_hap2", "chr6_mann_hap4", "chr6_apd_hap1", "chr6_qbl_hap6", "chr6_dbb_hap3", "chr17_ctg5_hap1", "chr4_ctg9_hap1", "chr1_gl000192_random", "chrUn_gl000225", "chr4_gl000194_random", "chr4_gl000193_random", "chr9_gl000200_random", |
| 83 | + "chrUn_gl000222", "chrUn_gl000212", "chr7_gl000195_random", "chrUn_gl000223", "chrUn_gl000224", "chrUn_gl000219", "chr17_gl000205_random", "chrUn_gl000215", "chrUn_gl000216", "chrUn_gl000217", "chr9_gl000199_random", "chrUn_gl000211","chrUn_gl000213", "chrUn_gl000220", |
| 84 | + "chrUn_gl000218", "chr19_gl000209_random", "chrUn_gl000221", "chrUn_gl000214", "chrUn_gl000228", "chrUn_gl000227", "chr1_gl000191_random", "chr19_gl000208_random", "chr9_gl000198_random", "chr17_gl000204_random", "chrUn_gl000233", "chrUn_gl000237", "chrUn_gl000230", |
| 85 | + "chrUn_gl000242", "chrUn_gl000243", "chrUn_gl000241", "chrUn_gl000236", "chrUn_gl000240", "chr17_gl000206_random", "chrUn_gl000232", "chrUn_gl000234", "chr11_gl000202_random", "chrUn_gl000238", "chrUn_gl000244", "chrUn_gl000248", "chr8_gl000196_random", "chrUn_gl000249", |
| 86 | + "chrUn_gl000246", "chr17_gl000203_random", "chr8_gl000197_random", "chrUn_gl000245", "chrUn_gl000247", "chr9_gl000201_random", "chrUn_gl000235", "chrUn_gl000239", "chr21_gl000210_random", "chrUn_gl000231", "chrUn_gl000229", "chrM", "chrUn_gl000226", "chr18_gl000207_random"}; |
| 87 | + |
| 88 | + chromName = new ArrayList<String>(Arrays.asList(chrom)); |
| 89 | + long[] size = new long[] {249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 155270560, 146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 63025520, 59373566, 59128983, 51304566, 48129895, |
| 90 | + 4928567, 4833398, 4795371, 4683263, 4622290, 4611984, 4610396, 1680828, 590426, 547496, 211173, 191469, 189789, 187035, 186861, 186858, 182896, 180455, 179693, 179198, 174588, 172545, 172294, 172149, 169874, 166566, 164239, 161802, 161147, 159169, 155397, 137718, 129120, 128374, |
| 91 | + 106433, 92689, 90085, 81310, 45941,45867, 43691, 43523, 43341, 42152, 41934, 41933, 41001, 40652, 40531, 40103, 39939, 39929, 39786, 38914, 38502, 38154, 37498, 37175, 36651, 36422, 36148, 34474, 33824, 27682, 27386, 19913, 16571, 15008, 4262}; |
| 92 | + chromSize = new ArrayList<Long>(); |
| 93 | + for(int x = 0; x < size.length; x++) { chromSize.add(new Long(size[x])); } |
| 94 | + } |
| 95 | + |
| 96 | + public static void initialize_mm10() { |
| 97 | + genomeName = "mm10"; |
| 98 | + genomeSize = (long) 2.725521370E9; |
| 99 | + String[] chrom = {"chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chrX", "chrY"}; |
| 100 | + chromName = new ArrayList<String>(Arrays.asList(chrom)); |
| 101 | + long[] size = new long[] { 195471971, 182113224, 160039680, 156508116, 151834684, 149736546, 145441459, 129401213, 124595110, 130694993, 122082543, 120129022, 120421639, 124902244, 104043685, 98207768, 94987271, 90702639, 61431566, 171031299, 91744698}; |
| 102 | + chromSize = new ArrayList<Long>(); |
| 103 | + for(int x = 0; x < size.length; x++) { chromSize.add(new Long(size[x])); } |
| 104 | + } |
| 105 | + |
| 106 | + public GenericCoord generateRandomCoord(int WINDOW) { |
| 107 | + GenericCoord coord = null; |
| 108 | + boolean VALID = false; |
| 109 | + while(!VALID) { |
| 110 | + long location = ThreadLocalRandom.current().nextLong(genomeSize); |
| 111 | + long runningTotal = 0; |
| 112 | + int chromIndex = 0; |
| 113 | + for(int x = 0; x < chromSize.size(); x++) { |
| 114 | + if(location < runningTotal + chromSize.get(x).longValue()) { |
| 115 | + chromIndex = x; |
| 116 | + x = chromSize.size() + 1; |
| 117 | + } else { runningTotal += chromSize.get(x).longValue(); } |
| 118 | + } |
| 119 | + location -= runningTotal; |
| 120 | + String CHROM = chromName.get(chromIndex); |
| 121 | + long START = location - (WINDOW / 2); |
| 122 | + long STOP = location + (WINDOW / 2); |
| 123 | + String DIR = "+"; |
| 124 | + if(ThreadLocalRandom.current().nextBoolean()) { DIR = "-"; } |
| 125 | + |
| 126 | + if(START > 0 && STOP < chromSize.get(chromIndex).longValue()) { |
| 127 | + VALID = true; |
| 128 | + coord = new GenericCoord(CHROM, START, STOP, DIR); |
| 129 | + } |
| 130 | + } |
| 131 | + return coord; |
| 132 | + } |
| 133 | + |
| 134 | + //Check to make sure WINDOW size is smaller than every chromosome to prevent infinite loop |
| 135 | + public boolean isSmaller(int WINDOW) { |
| 136 | + for(int x = 0; x < chromSize.size(); x++) { |
| 137 | + if(WINDOW > chromSize.get(0)) { return false; } |
| 138 | + } |
| 139 | + return true; |
| 140 | + } |
| 141 | +} |
0 commit comments