Skip to content

Commit d403113

Browse files
committed
add scripts and finalize v0.1.1
1 parent 33594a2 commit d403113

10 files changed

Lines changed: 316 additions & 45 deletions

File tree

bin/__init__.py

Whitespace-only changes.

bin/ispcr.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python
2+
#-*- coding: utf-8 -*-
3+
#@Time : 9/10/2022 5:25 PM
4+
#@Author : Runsheng
5+
#@File : ispcr.py
6+
7+
"""
8+
in silico PCR script
9+
"""
10+
11+
import argparse
12+
import os,sys
13+
14+
# self import
15+
from primerdiffer.primer_check import insilicon_pcr, product2seqdic
16+
from primerdiffer.utils import checkblastdb, dic2fasta, dic2dic,fasta2dic
17+
18+
19+
parser=argparse.ArgumentParser()
20+
parser.add_argument("-d", "--wkdir", default=None,
21+
help="The dir path contain the file, if not given, use the current dir")
22+
# input
23+
parser.add_argument("-f", "--forward",
24+
help="the forward primer sequence")
25+
parser.add_argument("-r", "--reverse",
26+
help="the reverse primer sequence")
27+
28+
parser.add_argument("-g", "--genome",
29+
help="the fasta file used to design primer")
30+
31+
# primer cutoff
32+
parser.add_argument("--alignlen", type=int, default=16,
33+
help="the cutoff of primer min align length as a right hit, default is 16")
34+
parser.add_argument("--free3len", type=int, default=2,
35+
help="the cutoff of primer 3' align length as a right hit, default is 2")
36+
parser.add_argument("--productlen", type=int, default=2000,
37+
help="the cutoff of max product which will be treated as a false priming, default is 2000.")
38+
39+
# output paramenter
40+
parser.add_argument("-o", "--out", default="product.fasta",
41+
help="output file, contains all possible amplification regions of this primer pair")
42+
43+
# default handler
44+
args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
45+
wkdir=os.getcwd() if args.wkdir is None else args.wkdir
46+
47+
# main
48+
checkblastdb(args.genome) # check blastdb, if no, run makeblastdb
49+
50+
product_l=insilicon_pcr(primer_left=args.forward,
51+
primer_right=args.reverse,
52+
db=args.genome,
53+
cutoff_alignlength=args.alignlen,
54+
cutoff_free3=args.free3len,
55+
product_cutoff=args.productlen
56+
)
57+
seqdic=dic2dic(fasta2dic(args.genome))
58+
outdic=product2seqdic(product_l,seqdic)
59+
dic2fasta(outdic, args.out)
60+
61+
62+
63+
64+

bin/primerdesign.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python
2+
#-*- coding: utf-8 -*-
3+
#@Time : 9/10/2022 4:30 PM
4+
#@Author : Runsheng
5+
#@File : primerdesign.py
6+
7+
"""
8+
The main script used to run cmd orders
9+
"""
10+
11+
import argparse
12+
import os,sys
13+
14+
#currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
15+
#parentdir = os.path.dirname(os.path.dirname(currentdir))
16+
#sys.path.insert(0,parentdir)
17+
18+
# self import
19+
from primerdiffer.walk_chr import flow_walk_chr
20+
21+
parser=argparse.ArgumentParser()
22+
parser.add_argument("-d", "--wkdir", default=None,
23+
help="The dir path contain the file, if not given, use the current dir")
24+
# input
25+
parser.add_argument("-g1", "--genome1",
26+
help="the fasta file used to design primer")
27+
parser.add_argument("-g2", "--genome2",
28+
help="the fasta file used to check false priming")
29+
parser.add_argument("-pos", "--position",default=None,
30+
help="position on genome1 to design primer, use string with IGV format, like ChrX:1956230-1976220")
31+
32+
# primer cutoff
33+
parser.add_argument("--alignlen", type=int, default=16,
34+
help="the cutoff of primer min align length as a right hit, default is 16")
35+
parser.add_argument("--free3len", type=int, default=2,
36+
help="the cutoff of primer 3' align length as a right hit, default is 2")
37+
parser.add_argument("--productlen", type=int, default=2000,
38+
help="the cutoff of max product which will be treated as a false priming, default is 2000.")
39+
40+
# hit cutoff
41+
parser.add_argument("-h1","--hit1", type=int, default=1,
42+
help="the cutoff of max number of in-silicon PCR product which can be found in genome1. default is 1")
43+
parser.add_argument("-h2","--hit2", type=int, default=0,
44+
help="the cutoff of max number of in-silicon PCR product which can be found in genome2, default is 0")
45+
46+
# run parameter, how dense the primer should be
47+
parser.add_argument("-i","--interval", type=int, default=5000,
48+
help="interval is the region begins to pick primers, default is 5000. If 5k is the unit, will pick one primer each 5k")
49+
parser.add_argument("-j","--jump", type=int, default=500,
50+
help="jump is the region to jump inside intervals if the prvious interval can not generate a valid primer"
51+
", the smaller, more sites to check. Default is 500.")
52+
# output paramenter
53+
parser.add_argument("--prefix", default="primers",
54+
help="prefix of output file, default is primers")
55+
56+
57+
# default handler
58+
args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
59+
wkdir=os.getcwd() if args.wkdir is None else args.wkdir
60+
61+
flow_walk_chr(wkdir=wkdir,
62+
genome1=args.genome1,
63+
genome2=args.genome2,
64+
pos_str=args.position,
65+
cutoff_alignlength=args.alignlen,
66+
cutoff_free3=args.free3len,
67+
product_cutoff=args.productlen,
68+
db1_maxhit=args.hit1,
69+
db2_maxhit=args.hit2,
70+
interval=args.interval,
71+
jump=args.jump,
72+
out_prefix=args.prefix)

primerdiffer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__= "0.1.0"
1+
__version__= "0.1.1"

primerdiffer/primer_check.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,20 @@
44
# @Author : Runsheng
55
# @Email : Runsheng.lee@gmail.com
66
# @File : primer_check.py
7-
import primer3
87

9-
from primerdiffer.general_settings import primer3_general_settings
108

119
try:
1210
from StringIO import StringIO ## for Python 2
1311
except ImportError:
1412
from io import StringIO ## for Python 3
1513

14+
import primer3
1615
from Bio.Blast.Applications import NcbiblastnCommandline
1716
from Bio.Blast import NCBIXML
1817

18+
from primerdiffer.general_settings import primer3_general_settings
19+
from primerdiffer.utils import fasta2dic,dic2dic,chr_select, reverse_complement
20+
1921

2022
def my_design_primer(name,seq,primer3_settings=primer3_general_settings):
2123
"""
@@ -66,7 +68,7 @@ def filter_hsp(blast_records,query,cutoff_alignlength=16,cutoff_free3=2, debugmo
6668
return keep
6769

6870

69-
def insilicon_pcr(primer_left, primer_right, db, cutoff_alignlength=16, cutoff_free3=2, profuct_cutoff=2000,
71+
def insilicon_pcr(primer_left, primer_right, db, cutoff_alignlength=16, cutoff_free3=2, product_cutoff=2000,
7072
debugmod=False):
7173
"""
7274
para: the left and right primers
@@ -85,12 +87,29 @@ def insilicon_pcr(primer_left, primer_right, db, cutoff_alignlength=16, cutoff_f
8587
for pr in p_right:
8688
# print pl, pr
8789
# use only the start to get a approx length, also, the direction for left and right primer should be different
88-
if pl[0] == pr[0] and abs(pl[1] - pr[1]) <= profuct_cutoff and pl[-1] * pr[-1] == -1:
90+
if pl[0] == pr[0] and abs(pl[1] - pr[1]) <= product_cutoff and pl[-1] * pr[-1] == -1:
8991
possible_product.append((pl[0], pl[1], pr[1]))
9092

9193
return possible_product
9294

9395

96+
def product2seqdic(product_l, seqdic):
97+
"""
98+
use one line from product_l to write the sequence
99+
"""
100+
outdic={}
101+
for product_1 in product_l:
102+
chro, start, end=product_1
103+
# the product could be forward (as usual) or reverse (F as R and R as F)
104+
if end>=start:
105+
name, seq=chr_select(seqdic, chro, start, end)
106+
outdic[name]=seq
107+
else:
108+
name, seq=chr_select(seqdic, chro, end, start)
109+
outdic[name+"_RC"]=reverse_complement(seq)
110+
return outdic
111+
112+
94113
def _is_nofalse_primer(blast_records,query,debugmod=False):
95114
"""
96115
:param blast_records: input a blast record in XML format

primerdiffer/utils.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,21 @@
1010
"""
1111

1212
# Third oart import
13-
from Bio import SeqIO
13+
from os.path import exists
1414

15+
from Bio import SeqIO, Seq
16+
from Bio.Blast.Applications import NcbimakeblastdbCommandline
17+
18+
19+
def reverse_complement(seq):
20+
"""
21+
Given: A DNA string s of length at most 1000 bp.
22+
Return: The reverse complement sc of s.
23+
due to the complement_map,
24+
the symbol such as \n and something else is illegal
25+
the input need to be pure sequence
26+
"""
27+
return str(Seq.reverse_complement(seq))
1528

1629
def fasta2dic(fastafile):
1730
"""
@@ -35,6 +48,13 @@ def dic2dic(record_dict):
3548
seq_dict[k]=seq.upper()
3649
return seq_dict
3750

51+
def dic2fasta(record_dict, out):
52+
with open(out, "w") as fw:
53+
for k, v in record_dict.items():
54+
fw.write(">"+k+"\n")
55+
fw.write(v)
56+
fw.write("\n")
57+
3858

3959
def tuple_to_pos_str(pos_t):
4060
"""
@@ -68,3 +88,21 @@ def chr_select(seq_dict, chro, start,end):
6888
return name,seq
6989

7090

91+
def makeblastdb(genomefile):
92+
cline = NcbimakeblastdbCommandline(dbtype="nucl",
93+
input_file=genomefile)
94+
NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa')
95+
print(cline)
96+
cline()
97+
98+
99+
def checkblastdb(genomefile):
100+
"""
101+
check if the blastdb exist, if not, create one
102+
"""
103+
dbfile=genomefile+".nsq"
104+
if exists(dbfile):
105+
return 0
106+
else:
107+
makeblastdb(genomefile)
108+
return 1

primerdiffer/walk_chr.py

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,15 @@
99

1010
# self import
1111
import os
12-
from os.path import exists
13-
14-
from Bio.Blast.Applications import NcbimakeblastdbCommandline
1512

1613
from primerdiffer.primer_check import my_design_primer, primer_check
17-
from primerdiffer.utils import dic2dic, fasta2dic, chr_select, tuple_to_pos_str, pos_str_to_tuple
14+
from primerdiffer.utils import dic2dic, fasta2dic, chr_select, tuple_to_pos_str, pos_str_to_tuple, checkblastdb
1815

1916

2017
def walk_chr_dense(genome, chro, start, end, db1, db2,
21-
interval=500000, jump=4000, out_prefix="primers"):
18+
cutoff_alignlength=16, cutoff_free3=2, product_cutoff=2000,
19+
db1_maxhit=1, db2_maxhit=0,
20+
interval=500000, jump=4000, out_prefix="primers", debugmod=False):
2221
"""
2322
:param genome: genome is a dict in name:seq,
2423
:param chro:
@@ -43,7 +42,10 @@ def walk_chr_dense(genome, chro, start, end, db1, db2,
4342
offset += 1
4443
else:
4544
myprimer = my_design_primer(name=name, seq=seq)
46-
primer_used = primer_check(myprimer,db1,db2, debugmod=True)
45+
primer_used = primer_check(myprimer,db1,db2,
46+
cutoff_alignlength, cutoff_free3, product_cutoff,
47+
db1_maxhit, db2_maxhit,
48+
debugmod=debugmod)
4749
if primer_used == 0:
4850
offset += 1
4951
else:
@@ -62,27 +64,10 @@ def walk_chr_dense(genome, chro, start, end, db1, db2,
6264
return primer_dict
6365

6466

65-
def makeblastdb(genomefile):
66-
cline = NcbimakeblastdbCommandline(dbtype="nucl",
67-
input_file=genomefile)
68-
NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa')
69-
print(cline)
70-
cline()
71-
72-
73-
def checkblastdb(genomefile):
74-
"""
75-
check if the blastdb exist, if not, create one
76-
"""
77-
dbfile=genomefile+".nsq"
78-
if exists(dbfile):
79-
return 0
80-
else:
81-
makeblastdb(genomefile)
82-
return 1
83-
84-
85-
def flow_walk_chr(wkdir, genome1, genome2, pos_str, interval=4000, jump=400, out_prefix="primers"):
67+
def flow_walk_chr(wkdir, genome1, genome2, pos_str,
68+
cutoff_alignlength=16, cutoff_free3=2, product_cutoff=2000,
69+
db1_maxhit=1, db2_maxhit=0,
70+
interval=4000, jump=400, out_prefix="primers"):
8671
"""
8772
8873
genome1: the genome fasta file used to design primers
@@ -100,10 +85,14 @@ def flow_walk_chr(wkdir, genome1, genome2, pos_str, interval=4000, jump=400, out
10085

10186
# main
10287
g1=dic2dic(fasta2dic(genome1))
103-
#g2=dic2dic(fasta2dic(genome2))
10488
chro, start, end = pos_str_to_tuple(pos_str)
10589

10690
primer_dict=walk_chr_dense(genome=g1, chro=chro, start=start, end=end,
10791
db1=genome1,db2=genome2,
108-
interval=interval, jump=jump, out_prefix=out_prefix)
92+
cutoff_alignlength=cutoff_alignlength,
93+
cutoff_free3=cutoff_free3,
94+
product_cutoff=product_cutoff,
95+
db1_maxhit=db1_maxhit,
96+
db2_maxhit=db2_maxhit,
97+
interval=interval, jump=jump, out_prefix=out_prefix)
10998
print(primer_dict)

0 commit comments

Comments
 (0)