44# @Author : Runsheng
55# @Email : Runsheng.lee@gmail.com
66# @File : primer_check.py
7+ import primer3
8+
9+ from primerdiffer .general_settings import primer3_general_settings
710
811try :
912 from StringIO import StringIO ## for Python 2
1417from Bio .Blast import NCBIXML
1518
1619
20+ def my_design_primer (name ,seq ,primer3_settings = primer3_general_settings ):
21+ """
22+ general wrapper for primer3-py
23+ :param name: name for the sequence
24+ :param seq: string for sequence in UPPER case
25+ :param primer3_settings: general setting for primer_design
26+ :return: the dict storing the primer pairs specific for seq
27+ """
28+ seq_args = {'SEQUENCE_ID' : name ,
29+ 'SEQUENCE_TEMPLATE' :seq }
30+ myprimer = primer3 .bindings .designPrimers (seq_args ,primer3_settings )
31+ return myprimer
32+
33+
34+
1735def primer_blast (query , db ):
1836 # query = myprimer['PRIMER_LEFT_0_SEQUENCE'] # The sequence
1937 blastn_cline = NcbiblastnCommandline (db = db , outfmt = 5 , task = "blastn-short" ) #Blast command
@@ -22,7 +40,58 @@ def primer_blast(query, db):
2240 return blast_records
2341
2442
25- def is_nofalse_primer (blast_records ,query ,debugmod = False ):
43+ def filter_hsp (blast_records ,query ,cutoff_alignlength = 16 ,cutoff_free3 = 2 , debugmod = False ):
44+ """
45+ filter the hsp, keep only the hsps with align_length <cutoff or free3 <cutoff
46+ used mainly for insilicon_pcr
47+ """
48+ keep = []
49+ for n ,alignment in enumerate (blast_records .alignments ):
50+ # get all possible alignment position that pass the filter
51+ if debugmod == True :
52+ print ("chro is" , alignment .hit_def )
53+ for hsp in alignment .hsps :
54+ if debugmod == True :
55+ print ("The subj end is" , hsp .sbjct_end )
56+ print ("The query is" , query )
57+ print (hsp )
58+ print (hsp .query_end , len (query ))
59+ # get the cutoff
60+ if hsp .align_length >= cutoff_alignlength or len (query )- hsp .query_end <= cutoff_free3 :
61+ strand = hsp .frame [- 1 ] # the query is always 1, the target may be -1, 1 is plus and -1 is minus
62+ keep .append ((alignment .hit_def , hsp .sbjct_start , hsp .sbjct_end , strand )) # no end , just one pos
63+ if debugmod == True :
64+ print ("===============Keep==============" )
65+
66+ return keep
67+
68+
69+ def insilicon_pcr (primer_left , primer_right , db , cutoff_alignlength = 16 , cutoff_free3 = 2 , profuct_cutoff = 2000 ,
70+ debugmod = False ):
71+ """
72+ para: the left and right primers
73+ return: a bed-like tuple-list
74+ """
75+ possible_product = []
76+
77+ blast_records_left = primer_blast (primer_left , db )
78+ blast_records_right = primer_blast (primer_right , db )
79+
80+ # p_left is a bed like tuple list like [("I", 10000)]
81+ p_left = filter_hsp (blast_records_left , primer_left , cutoff_alignlength , cutoff_free3 )
82+ p_right = filter_hsp (blast_records_right , primer_right , cutoff_alignlength , cutoff_free3 )
83+
84+ for pl in p_left : # may need to add a score sys to this function
85+ for pr in p_right :
86+ # print pl, pr
87+ # use only the start to get a approx length, also, the direction for left and right primer should be different
88+ if pl [0 ] == pr [0 ] and abs (pl [1 ] - pr [1 ]) <= profuct_cutoff and pl [- 1 ] * pr [- 1 ] == - 1 :
89+ possible_product .append ((pl [0 ], pl [1 ], pr [1 ]))
90+
91+ return possible_product
92+
93+
94+ def _is_nofalse_primer (blast_records ,query ,debugmod = False ):
2695 """
2796 :param blast_records: input a blast record in XML format
2897 :param query: the query sequence (str)
@@ -43,21 +112,46 @@ def is_nofalse_primer(blast_records,query,debugmod=False):
43112 return True
44113
45114
46- def primer_check (myprimer , db , primer_number = 5 , debugmod = False ):
47- '''primer is a return of function primer3.bindings.designPrimers'''
115+ def primer_check (myprimer , db1 , db2 , primer_number = 5 ,
116+ cutoff_alignlength = 16 ,cutoff_free3 = 2 , profuct_cutoff = 2000 ,
117+ db1_maxhit = 1 , db2_maxhit = 0 ,
118+ debugmod = False ):
119+ '''primer is a return of function primer3.bindings.designPrimers
120+ db1 and db2 are blastdb,
121+
122+ CASE1:
123+ db1 is the fasta genome used to design primer, so the product need to be only 1 (db1_maxhit=1)
124+ db2 is the fasta genome which should not be amplified, so the product need to be 0 (db2_maxhit=2)
125+
126+ CASE2:
127+ db1 is a short sequence used to design primer, products (db1_maxhit) need to be <=1
128+ db2 is the whole genome, which should has little
129+
130+ cutoff_alignlength=16,cutoff_free3=2, profuct_cutoff=2000 are used for in silicon PCR
131+ '''
132+
48133 for i in range (0 , primer_number ):
49134 left = myprimer ['PRIMER_LEFT_' + str (i ) + '_SEQUENCE' ]
50135 right = myprimer ['PRIMER_RIGHT_' + str (i ) + '_SEQUENCE' ]
51- if debugmod :
52- print ("The %d primer :" % i )
53- print (left , right )
54- blast_records_l = primer_blast (left ,db = db )
55- blast_records_r = primer_blast (right ,db = db )
136+ # designed primer size
56137 product_size = myprimer ['PRIMER_PAIR_' + str (i ) + '_PRODUCT_SIZE' ]
57138
58- if is_nofalse_primer (blast_records_l , left , debugmod = debugmod ) and is_nofalse_primer (blast_records_r , right ,
59- debugmod = debugmod ):
60- print ("Both pass" )
61- return (left , right , product_size )
139+ # the original sequence to detect the false primer
140+ product_l1 = insilicon_pcr (left , right , db1 ,
141+ cutoff_alignlength ,cutoff_free3 , profuct_cutoff ,
142+ debugmod = debugmod )
143+ # the genome used to check false priming
144+ product_l2 = insilicon_pcr (left , right , db2 ,
145+ cutoff_alignlength , cutoff_free3 , profuct_cutoff ,
146+ debugmod = debugmod )
147+ if debugmod :
148+ print ("The %d primer :" % i )
149+ print (left , right )
150+ print ("product_l1" , product_l1 )
151+ print ("produect_l2" , product_l2 )
152+
153+ if len (product_l1 )<= db1_maxhit and len (product_l2 )<= db2_maxhit : # no false primer
154+ return left , right , product_size # return is a tuple
62155 return 0
63156
157+
0 commit comments