-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFind a Middle Edge in an Alignment Graph in Linear Space.py
More file actions
46 lines (36 loc) · 3.68 KB
/
Find a Middle Edge in an Alignment Graph in Linear Space.py
File metadata and controls
46 lines (36 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from helpers import create_strings
from Bio.Align import substitution_matrices
from numpy import argmax,argmin
def FindMiddleEdge(s,t,replace_score=substitution_matrices.load("BLOSUM62"),indel_cost=5):
def update(j,col1,col2,s,t):
col2[0] = - (j * indel_cost)
for i in range(1,len(col2)):
scores = [col1[i-1] + score((s[i-1],t[j-1]),replace_score=replace_score),
col2[i-1] - indel_cost,
col1[i] - indel_cost
]
best = argmax(scores)
col2[i] = scores[best]
return col2,col1
def explore(s,t,middle):
column_A = [-(i * indel_cost) for i in range(len(s)+1)]
column_B = [0 for i in range(len(s)+1)]
for j in range(1,middle+1):
scores,previous = update(j,column_A,column_B,s,t) if j%2 ==1 else update(j,column_B,column_A,s,t)
return scores,previous
middle = len(t)//2
from_source,_ = explore(s,t,middle)
to_sink,pre = explore(s[::-1],t[::-1],middle)
length = [a+b for (a,b) in zip(from_source,to_sink[::-1])]
aux = [0 for i in range(len(s)+1)]
post,_ = update(middle+1,from_source,aux,s,t)
next_steps = [a+b for (a,b) in zip(post,pre[::-1])]
return ((argmax(length), middle), (argmax(next_steps), middle+1))
def score(pair,replace_score=substitution_matrices.load("BLOSUM62")):
def reverse(pair):
a,b=pair
return (b,a)
return replace_score[pair] if pair in replace_score else replace_score[reverse(pair)]
if __name__=='__main__':
print(FindMiddleEdge('LFRNDCFMATRMHNYIHPAQLSLFKIAFWGVNQIWSDNWPDDAMCHLTIIGPMVPLEQIWDPSTFFFEWPMQRQHNCFGKMPDGHAICCFNNCRYQIEFDPWRDNMYPWVGYSIKEFCQGKGTLMRIIMIKMVSVIVQPNYYKIYERLNMCELYHQCTMYHQCTFEPWWYRNSQFDDIPMVSEVTWSTHTQWWCRNCNASNLAKFPNYYFSVQTIHVEPGGRYSWDQPKSYAGSLEETCGDSCPKNPMFWDRNHLYTCASQCPQHEEMVKHGVYCEVFRVCDSTHDEPGSSNRAQILDVHCVNRVNYPAPYFASAQGILNLYLVMAGGKHEQHKPWHILKCMEPELPRYKFDLANLKVPGDAVMCVTHWTVWWFCQNRPCNCHWWFHFGSVRCHHPASMTMIGMMNFKQAYHWEITIKIWHWRSIDMVHYAKLCATIWGGPKPIFVCTQELHGGCSEWKPIQYEIVDSSPFMECKYACNKPEFIDHGRCTHSSKDRIERDPTGDEGEKVMGPWCETRCAQTFESHACFQSPGAPKCDLERDHLLVEYRWERNAYFMAFVDDYRGRIIDWWYKDNQCFCEIPALENTQENRMLHVNNAGHFWQNSWQCKNERRRMTMEVRLHGYPEAFMMKVLFPNAFTHIYFGGRQKVIGCFRRVYPVNNDQIFTENQRLMGRLYLAIYWPAYQGTWQWRISGRELVKQFHGWDMTTVGTYGPSHQRDNCDVYTDPLHIIELWKWSHVSGYIEFQLVTNNQWKATHGVEIMKYRDRGSCDRFNSVMHHTWVEIHNLKYCPVWVCLMQFCFCPKLWHLHQTMTDANRYYIARYVCTWTAKNMRFQWPYFHVQIKMVTCQFCGMMVFRYGFRHIIWKCEVPIIDMWFKRSYEPDSPCMLAYILPYNAYGTMVKSENLGNCPTSKSMPVQIFGPSFSHEFDPWDEASACDNMVLNGRPISYMQSYGAWDHPQFHDKDSGLHHTVKIKQWPQKRTNWTQFNWVVSHPFYEGLRHQVPNFSHRFNEQAAVMIRDHYCMYTKSHLMRSAYI',
'MPLIHWFTMGPPGIKIHQCCCTLWFVHADNCEKDWAQGAWNNCDWMWMTPSWFEKWATSNLNDPDCLSCLFYYPHYASTGQACYCETYQQRNMKLRILLQDTICKDFIVVGKPSKCGHKSSRRSLRPEELTSPSAWLVVARDDMYPYKEMLVAMKMNMENNYEPWQQGDGKHHICAPDANWKGNMYPQYIGICMPHFGLHVTVHRMTVARENMAECQHQDPDNPVLMMVNGKWYRETWWGQKIKHKTWIGDLDHIMSLIIFNQVCTMNRFQPDIIIGAKDFQLTKMEVQKHGGCFTRGMGPIGHYLFPLARMYFFCTAYVYCPDREGNWVTWNCTPVTSGHIYYNSQRNDWGLMCDRAKRRGTVKPIDFRACMKNVQSEAKVHSRERHNEQCKTCLQECAAEFKALWWWVHHSGDIPSHHTWYLCQISQFAHCFGIGDSCCWTECYKWQMLYGGCAEEFIIWKPIQYEIVDSSPFLECKYACNKPEFIGHRDKIERDHLISSRRTNGEKVMGPWCETRCAQTFESHACFQSPGAPHCDLERDHLLVEYRWERNAYFMAFGSDGYRGVWDLFAGDKIMNQCFSEIPKPCGFKVGLENLEVYDEPWYNIMDHFEAVDHLESGDLLCASRTDVKGQDVDYEHIRNSITGIHSYPTWCGQSAAHGICQTQPMSCNWRTEYVNDATKNSRASTKLTLRPLNWGMQRADQTNFDTQALFPKTDPWHGGIDYISENGTLQNMHGKYAPWPHRGRPPGTDSLCKRFWSYHGMWYWWYVNHIQCIIDDLCVDQEVITIRENLESVYGKDVWEILCQNKCGGPCLYPYSCRNAFEELPWGECPMPLENWPRWRLEGTFMVLCGCLIYQFPSVNLKCYPVWIQGGHCVQHQWALAWGKTFGSDCDTERRRECQCQTMENWEERRILAQKPRMQGFQVMNCMHFILGKQRIYSCRHDPQSLGPVHKPINEYHSRKEKLSMYTKYFAHIPKSCTPRLVRETIIPITQMQHGDLQHCECSIWPSNYACGYVCWVILKSQKPWPILLELWLDMQPVINCDDC'))