Skip to content

Commit 9d5148d

Browse files
committed
Re-ran step-wise comparison for manuscript
1 parent eda240f commit 9d5148d

3 files changed

Lines changed: 208 additions & 0 deletions

File tree

manuscript/R/step-wiseKaKsReadMe.R

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
###############################################################
2+
# This is a readme for analyzing Ka/Ks output from
3+
# AlignmentProcessor at different filtering point
4+
#
5+
# Required programs: R
6+
# Boot Package
7+
###############################################################
8+
9+
#-------------------------------
10+
# 1. Import Data into R
11+
#-------------------------------
12+
13+
filtered<-read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/noStops/galGal4-filteredKaKs.csv",header =TRUE)
14+
stops<-read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/withStops/galGal4-withStopsKaKs.csv",header =TRUE)
15+
unfiltered<-read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/unfiltered/galGal4-unfilteredKaKs.csv",header =TRUE)
16+
17+
# Remove NAs
18+
filtered <- na.omit(filtered$Ka.Ks)
19+
stops <- na.omit(stops$Ka.Ks)
20+
unfiltered <- na.omit(unfiltered$Ka.Ks)
21+
22+
#-------------------------------
23+
# 2. Determine median Ka, Ks, and Ka/Ks
24+
#-------------------------------
25+
# Load bootstrap function and boot
26+
source("/home/yitzhak/Dropbox/Scripts/R/bootstrap.R")
27+
28+
# Ka/Ks
29+
bootstrap(filtered)
30+
bootstrap(stops)
31+
bootstrap(unfiltered)
32+
33+
#-------------------------------
34+
# 3. Import Autosomal and Z chromosome data to get transcript totals
35+
#-------------------------------
36+
37+
fa <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/noStops/filteredKaKs-Autosomes.csv", header=TRUE)
38+
fa <- na.omit(fa$Ka.Ks)
39+
fz <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/noStops/filteredKaKs-Z.csv", header=TRUE)
40+
fz <- na.omit(fz$Ka.Ks)
41+
42+
sa <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/withStops/withStopsKaKs-Autosomes.csv", header=TRUE)
43+
sa <- na.omit(sa$Ka.Ks)
44+
sz <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/withStops/withStopsKaKs-Z.csv", header=TRUE)
45+
sz <- na.omit(sz$Ka.Ks)
46+
47+
ua <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/unfiltered/unfilteredKaKs-Autosomes.csv", header=TRUE)
48+
ua <- na.omit(ua$Ka.Ks)
49+
uz <- read.csv("/media/yitzhak/Data1/archive/Aves/kaksFiltering/unfiltered/unfilteredKaKs-Z.csv", header=TRUE)
50+
uz <- na.omit(uz$Ka.Ks)
51+
52+
#-------------------------------
53+
# 4. Calculate Median Ka/Ks for Autosomal and Z chromosome
54+
#-------------------------------
55+
56+
bootstrap(fa)
57+
bootstrap(fz)
58+
bootstrap(sa)
59+
bootstrap(sz)
60+
bootstrap(ua)
61+
bootstrap(uz)

manuscript/maskStops.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
'''This program will mask stop codons.
2+
3+
Copyright 2016 by Shawn Rupp'''
4+
5+
import argparse
6+
from glob import glob
7+
from collections import OrderedDict
8+
import os
9+
10+
def openFiles(indir, outdir):
11+
'''Opens all input files in the directory, performs filtering steps,
12+
and writes to file.'''
13+
print("\tMasking stop codons...")
14+
# Iterate through files
15+
files = glob(indir + "*")
16+
for fasta in files:
17+
filename = fasta.split("/")[-1]
18+
geneid = filename.split(".")[0]
19+
# Extract number of sequnces from file name
20+
n = int(filename.split(".")[1])
21+
seqs = seqDict(fasta)
22+
seqs = removeStops(seqs)
23+
# Create output file
24+
outfile = (outdir + geneid + "." + str(n) + ".filtered.fa")
25+
writeDict(outfile, seqs)
26+
27+
def seqDict(fasta):
28+
'''Convert fasta into separate sequence objects, determine sequence names
29+
and create dictionary entries for each set of codons'''
30+
seqs = OrderedDict()
31+
with open(fasta, "r") as infile:
32+
for line in infile:
33+
if line[0] == ">":
34+
species = line[1:].strip()
35+
if line[0] != ">":
36+
codons = []
37+
seq = line.strip()
38+
for i in range(0, len(seq), 3):
39+
codons.append(seq[i:i +3])
40+
i += 3
41+
seqs[species] = codons
42+
return seqs
43+
44+
def removeStops(seqs):
45+
'''This will replace internal stop codons with gaps.'''
46+
for species in seqs:
47+
for idx,codon in enumerate(seqs[species]):
48+
if codon == "TAA" or codon == "TAG" or codon == "TGA":
49+
# Replace stops with dashes
50+
seqs[species][idx] = "---"
51+
return seqs
52+
53+
def writeDict(outfile, seqs):
54+
# Convert dictionary values to string and write to file
55+
with open(outfile, "w") as output:
56+
for species in seqs:
57+
output.write(">" + str(species) + "\n")
58+
seq = ""
59+
for codon in seqs[species]:
60+
seq += str(codon)
61+
output.write(seq + "\n")
62+
63+
def main():
64+
parser = argparse.ArgumentParser(description="This script will mask stop \
65+
codons in a fasta alignment.")
66+
parser.add_argument("-i", help="Path to input directory.")
67+
parser.add_argument("-o", help="Path to output directory.")
68+
# Parse arguments and assign to variables
69+
args = parser.parse_args()
70+
indir = args.i
71+
if indir[-1] != "/":
72+
indir += "/"
73+
outdir = args.o
74+
if outdir[-1] != "/":
75+
outdir += "/"
76+
openFiles(indir, outdir)
77+
78+
if __name__ == "__main__":
79+
main()

manuscript/step-wiseKaKs.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
##############################################################################
2+
# This script will call AlignmentProcessor on the chicken-falcon pairwise
3+
# alignment and run KaKs_Calclator for each level of filtering.
4+
#
5+
# Required: AlignmentProcessor1.4
6+
# KaKs_Calculator
7+
#
8+
##############################################################################
9+
10+
#-----------------------------------------------------------------------------
11+
# 1. Full filtering
12+
#-----------------------------------------------------------------------------
13+
14+
python AlignmentProcessor.py -t 6 -r galGal4 --kaks -i /home/shawn/Documents/AvesAlignments/Pairwise/galgal_falper.fa -o /home/shawn/Documents/AvesAlignments/kaks/noStops
15+
16+
# Add ID and locus data
17+
join -t "," --header --check-order -1 2 -2 1 "/home/shawn/Documents/AvesAlignments/Pairwise/galGal4GeneTranscriptIDs.csv" "/home/shawn/Documents/AvesAlignments/kaks/noStops/KaKs.csv" > "/home/shawn/Documents/AvesAlignments/kaks/noStops/galGal4-filteredKaKs.csv"
18+
19+
# Subset Autosomes and Z and run permutation script
20+
cat "/home/shawn/Documents/AvesAlignments/kaks/noStops/galGal4-filteredKaKs.csv" | awk -F ',' '{if($3=="Z" || $3=="Chromosome Name") print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/noStops/filteredKaKs-Z.csv"
21+
cat "/home/shawn/Documents/AvesAlignments/kaks/noStops/galGal4-filteredKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9]$/) print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/noStops/filteredKaKs-Autosomes.csv"
22+
cat "/home/shawn/Documents/AvesAlignments/kaks/noStops/galGal4-filteredKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9][0-9]$/) print $0}' >> "/home/shawn/Documents/AvesAlignments/kaks/noStops/filteredKaKs-Autosomes.csv"
23+
24+
python permutation.py --c1 7 --c2 7 --i1 /home/shawn/Documents/AvesAlignments/kaks/noStops/filteredKaKs-Autosomes.csv --i2 /home/shawn/Documents/AvesAlignments/kaks/noStops/filteredKaKs-Z.csv
25+
26+
#-----------------------------------------------------------------------------
27+
# 2. Retaining sequences with premature stops
28+
#-----------------------------------------------------------------------------
29+
30+
python AlignmentProcessor.py -t 6 -r galGal4 --kaks --retainStops -i /home/shawn/Documents/AvesAlignments/Pairwise/galgal_falper.fa -o /home/shawn/Documents/AvesAlignments/kaks/withStops
31+
32+
# Add ID and locus data
33+
join -t "," --header --check-order -1 2 -2 1 "/home/shawn/Documents/AvesAlignments/Pairwise/galGal4GeneTranscriptIDs.csv" "/home/shawn/Documents/AvesAlignments/kaks/withStops/KaKs.csv" > "/home/shawn/Documents/AvesAlignments/kaks/withStops/galGal4-withStopsKaKs.csv"
34+
35+
# Subset Autosomes and Z and run permutation script
36+
cat "/home/shawn/Documents/AvesAlignments/kaks/withStops/galGal4-withStopsKaKs.csv" | awk -F ',' '{if($3=="Z" || $3=="Chromosome Name") print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/withStops/withStopsKaKs-Z.csv"
37+
cat "/home/shawn/Documents/AvesAlignments/kaks/withStops/galGal4-withStopsKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9]$/ || $3=="Chromosome Name") print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/withStops/withStopsKaKs-Autosomes.csv"
38+
cat "/home/shawn/Documents/AvesAlignments/kaks/withStops/galGal4-withStopsKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9][0-9]$/ || $3=="Chromosome Name") print $0}' >> "/home/shawn/Documents/AvesAlignments/kaks/withStops/withStopsKaKs-Autosomes.csv"
39+
40+
python permutation.py --c1 7 --c2 7 --i1 /home/shawn/Documents/AvesAlignments/kaks/withStops/withStopsKaKs-Autosomes.csv --i2 /home/shawn/Documents/AvesAlignments/kaks/withStops/withStopsKaKs-Z.csv
41+
42+
#-----------------------------------------------------------------------------
43+
# 3. No filtering (using split fastas from previous steps)
44+
#-----------------------------------------------------------------------------
45+
46+
python maskStops.py -i /home/shawn/Documents/AvesAlignments/kaks/noStops/01_splitFasta -o /home/shawn/Documents/AvesAlignments/kaks/unfiltered/rmStops
47+
48+
python bin/03_ConvertFasta.py --axt -i /home/shawn/Documents/AvesAlignments/kaks/unfiltered/rmStops -o /home/shawn/Documents/AvesAlignments/kaks/unfiltered/axt
49+
50+
python bin/04_CallKaKs.py -t 6 -i /home/shawn/Documents/AvesAlignments/kaks/unfiltered/axt -o /home/shawn/Documents/AvesAlignments/kaks/unfiltered/kaks
51+
52+
# Add ID and locus data
53+
join -t "," --header --check-order -1 2 -2 1 "/home/shawn/Documents/AvesAlignments/Pairwise/galGal4GeneTranscriptIDs.csv" "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/KaKs.csv" > "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv"
54+
55+
# Subset Autosomes and Z and run permutation script
56+
cat "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv" | awk -F ',' '{if($3=="Z" || $3=="Chromosome Name") print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/unfilteredKaKs-Z.csv"
57+
cat "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9]$/ || $3=="Chromosome Name") print $0}' > "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/unfilteredKaKs-Autosomes.csv"
58+
cat "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv" | awk -F ',' '{if ($3 ~ /^[1-9][0-9]$/ || $3=="Chromosome Name") print $0}' >> "/home/shawn/Documents/AvesAlignments/kaks/unfiltered/unfilteredKaKs-Autosomes.csv"
59+
60+
python permutation.py --c1 7 --c2 7 --i1 /home/shawn/Documents/AvesAlignments/kaks/unfiltered/unfilteredKaKs-Autosomes.csv --i2 /home/shawn/Documents/AvesAlignments/kaks/unfiltered/unfilteredKaKs-Z.csv
61+
62+
#-----------------------------------------------------------------------------
63+
# 4. Permutations between Filtering levels
64+
#-----------------------------------------------------------------------------
65+
66+
python permutation.py --c1 7 --c2 7 --i1 /home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv --i2 /home/shawn/Documents/AvesAlignments/kaks/withStops/galGal4-withStopsKaKs.csv
67+
68+
python permutation.py --c1 7 --c2 7 --i1 /home/shawn/Documents/AvesAlignments/kaks/unfiltered/galGal4-unfilteredKaKs.csv --i2 /home/shawn/Documents/AvesAlignments/kaks/noStops/galGal4-filteredKaKs.csv

0 commit comments

Comments
 (0)