-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathConstruct the De Bruijn Graph of a String.py
More file actions
47 lines (39 loc) · 3.05 KB
/
Construct the De Bruijn Graph of a String.py
File metadata and controls
47 lines (39 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
k=12
text = "CCGTGGGTATACGTGCATCACATGATCCCAGGTGACGACACTCGATGAAGGCCCAGACTGGACTGGTGACGTTCTGCTTTAGACGACTGATCGTGCGCGCCCTTGTTCAAGGAAGGTGAGCTAGGAGGGTGGAAGGCGTACTCGGTGTCCTTCCATAGGGGTGGTGAAACGCCCGTATCGAAGCGGCGTTTCTGTCGTCGTTGCGTGTTTGGGCTACGTTATCGAGACGCTCCCTTACCAACTCCATTATTTACGAGACCGGTTTGTGTTAGTCGTTCAAACGTTGTGTGGCGTAGCATTCAGAACGCCAGTCTCCTTTGATGCCAGGCTCCAAGTTGGTTTTTAGGGGACAAGCAAATATTACCGGCAAGGACACGCGTAGGACAACGCGTTAGTAGTAATGCCAGGGTGGACCAATGTAAGCTCTTCTACGGAAGAGACGACCTCTATTGTTGAACCGTTTTGTTAGTAGATGATAAATTCGAATGACTAGTAAACCCCGACCCATATTGGTCTCAGGGACCTTCTACGGTTTGTTCAAAGTATTGAACAGCGGCGATACAAAACAAACGCTTGGCAACCGGTCCGGGGAAGAGTATTTTGAACTTGGCGGTGATACGCCATATCAGTGTTGACTCCCATTAGTTCCCGGCTTCTGTGAGAAAGCGCAATGTCGCATAATCCATAAATGAACGCATGTTGCGTAGCACAGATGGGACCGGTCTTGAACTGCCGTATAGAGGCCCGCGACCGAGTTTAGTAGATGAGCTTGCGGGCGCGCACTGCCGAGTTCGGGATAAGATACGTTTGGATGTAAGAAATGTCTTACCTTGTCGGCAATGGGCTTCTCCACTCGGGGAGGAAATCTTTATCACGCGAGGAGGGGGTGATTAACCATGTCTGTCTTCGGAAACGCTCGAGACATAGGGTGAACTGGCAAATCGGGCAAGGGGTCAATCCGCACATCGCTAGCTACGAGGCAGTTGAAGCTGCCCCCTGTTAACGCCCTCCGCAGGTTCGTAAATGTGGGTTTATTTTATGTCATCCTGTCCCAGCGTGGGTGCACTAAAGGACGAGACAAGGCCGTATACGGCTTAAGTGCGCGTCACACACGCACCCAGATTCGTTAAAAAAATAAGAAACTGGTCACTCACTCCCGGGAACACGTATCGGTTCCCAGTCGGGCACTACTCCTGCATGGCCGGACGATCTTCTTTGGCGCCCTTTCAAGCCCGGCCGTTATGCGTTATTGGCTTGGCGCTCACCAGTGACAGGCTCTTAGCGAGAAACAGATACCAGGAGAGGGGTTAACACACCACACGAGTTACCTCTGCATGCCGCAACTCTTCAACAAATCAACCGTGCGCTATAACAAGTCTGAGTCGTGGCCAGTAAAGGCCGTGGACTAACACTCAAAAGACTATTACCTACAATTTAGGAAGATGATCTTGTCCTGGCGGTTCAAGGTGGTAGACTTTAGCGGATTCGACAGTGTATGTCTACCTAAGACTCGAACAAAGAGCTGACGCCCCCAGTACTGTGCGAGTTAGGATCCCTGAGCCGCTTCAGATGACAATACTCCATGCTCACTTGTTAAAGACCTAGGTTATCGGAATCCTTCTATAGAGCTTAGGCCCCAAGTACACTTTAGTACGCTCTTAGCAGGCTTTTATCTCGAACCGGCATTCCCTCATTTCACTTTTATTAGATGTTGCCAACAGAGAAATAAGCTCACCAAATTCACACCCATGTGATTAATGTGCTGAGAAGTCGGTGTTCTTAAACGTTTCCGAGGGTACGACACGGGACCAAGCATCGAATACCAAACAGGGGGTCCAAATCCCCTGGTACCACCATTTCCCAGGCTCATATTTTGGGACCTTCGGGGTGTCGGCTGTTAATGTTTATCTATTAATTGGGTCTGCTGGGATGTTCCTGCTTCAAAGCCTCGGTCCTCCCTGGAGGATGAAGGTCCTGGCTCGAGCCCAGTATCTAAAGTG"
def kmer_composition(k,dna):
return [dna[i:i+k] for i in range(1+len(dna)-k)]
def grph_kmers(strings):
kk = len(strings[0]) - 1
graph = []
for s in strings:
for t in strings:
if s != t and s[-kk:] == t[:kk]:
graph.append((s, t))
return graph
def deBruijn(k, text):
kmers = kmer_composition(k - 1, text)
def standardize(ll):
lll = list(set(ll))
lll.sort()
return lll
pathgraph = grph_kmers(kmers)
deBruijn_dict = {}
for [a, b] in pathgraph:
if not a in deBruijn_dict:
deBruijn_dict[a] = []
deBruijn_dict[a].append(b)
graph = [(a, standardize(deBruijn_dict[a])) for a in deBruijn_dict.keys()]
graph.sort()
return graph
graph=[]
graphNodes=[]
outCome=[]
graph = deBruijn(k,text)
for i in range(len(graph)):
graphNodes = graph[i]
c = graphNodes[0]
outCome = graphNodes[1]
if(len(outCome)==2):
print(c + " -> " + outCome[0] + "," + outCome[1])
else:
print(c +" -> " + outCome[0])