WilsonSayresLab
diff --git a/‎manuscript/AvesMultiReadMe.sh‎
Lines changed: 58 additions & 0 deletions b/‎manuscript/AvesMultiReadMe.sh‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎manuscript/AvesPairwiseReadMe.sh‎
Lines changed: 67 additions & 0 deletions b/‎manuscript/AvesPairwiseReadMe.sh‎
Lines changed: 67 additions & 0 deletions
@@ -0,0 +1,58 @@
+###############################################################
+# This is a readme for analyzing fasta alignmnets
+#	downloaded from UCSC.
+#
+# 	Required programs:	AlignmentProcessor1.2
+#						CodeML4.9
+###############################################################
+
+#-------------------------------
+# 1. Alignments
+#-------------------------------
+
+# From the UCSC table browser page:
+Select the human hg19 genome, genes and gene predictions, and ensembl genes.
+Select CDS FASTA in ouput format, enter a file name, and select gzip compressed before hitting get output. On the next page,
+Select the multiz100way alignment under MAF table. Select every species under birds, and deselect everything else. 
+
+# From Ensembl BioMart:
+# http://dec2013.archive.ensembl.org/biomart/martview/00c5ddbaf1b0544dc54db6cd39534529
+Download a list of hg19/GRCh37 gene IDs with corresponding transcript IDs and chromosome. Save as a csv.
+
+#-------------------------------
+# 2. Run AlignmentProcessor to obtain pyhilp output
+#-------------------------------
+
+# Make sure to clone AlignmentProcessor from GitHub: https://github.com/WilsonSayresLab/AlignmentProcessor
+
+Upload fasta alignment and avesCodeml.sh to cluster and submit job.
+
+#-------------------------------
+# 4. Add gene IDs and locus data 
+#-------------------------------
+
+# First download concatenated CodeML results from the cluster.
+# Sort ascending by transcript ID (I did it in Excel since I inspected the files as well), then add gene IDs and locus data
+
+	join -t "," --header --check-order -1 2 -2 1 "h19GeneTranscriptIDs.csv" "branchSpecific/avesCodeMLNull.csv" > "branchSpecific/avesNullOutput.csv"
+	join -t "," --header --check-order -1 2 -2 1 "h19GeneTranscriptIDs.csv" "branchSpecific/avesCodeMLAlt.csv" > "branchSpecific/avesAltOutput.csv" 
+
+#-------------------------------
+# 5. Permute result files to obtain a p-value for mean and median tree lengths
+#-------------------------------
+
+# To compile the Cython scripts, change into the PermutationScripts directory and type: python setup.py build-ext --inplace
+
+	cd PermutationScripts/
+
+# dN
+	python permutation.py --c1 3 --c2 3 --i1 branchSpecific/avesNullOutput.csv --i2 branchSpecific/avesAltOutput.csv
+
+# dS
+	python permutation.py --c1 4 --c2 4 --i1 branchSpecific/avesNullOutput.csv --i2 branchSpecific/avesAltOutput.csv
+
+# dN/dS
+	python permutation.py --c1 5 --c2 5 --i1 branchSpecific/avesNullOutput.csv --i2 branchSpecific/avesAltOutput.csv
+
+# Tree length
+	python permutation.py --c1 6 --c2 6 --i1 branchSpecific/avesNullOutput.csv --i2 branchSpecific/avesAltOutput.csv
@@ -0,0 +1,67 @@
+###############################################################
+# This is a readme for analyzing pairwise fasta alignments
+#	output from lastz.
+#
+# 	Required programs:	Lastz 
+#				AlignmentProcessor1.2
+#				PhyML
+#				paml
+###############################################################
+
+#-------------------------------
+# 1. Run Lastz for Alignment
+#-------------------------------
+
+# Download scripts from the Avian Genome Project and upload to server.
+# Submit falcon_chicken.sh script on Saguaro. See comments at the end for changing file, target, and query names.
+
+#-------------------------------
+# 2. Run Stich Gene Blocks on Galaxy
+#-------------------------------
+
+# Upload the Lastz output maf file to Galaxy, and import the GalGal4 bed12 file from UCSC (select Genes and Gene Predictions,
+# Ensembl genes, genome, and BED format before getting output. On the next page select whole gene). Remove the "chr_UN" and "chr"
+# chromosome prefixes from the file, and resubmit the file to Galaxy. Also submit the chicken genome used in the alignment as a 
+# custom build and set the builds of all three files to the custom build. (Alternatively, just use the UCSC chicken genome in the alignment.)
+
+#-------------------------------
+# 3. Run AlignmentProcessor
+#-------------------------------
+
+# KaKs_Calculator
+	cd AlignmentProcessor/
+	python AlignmentProcessor.py -t 4 --axt --kaks -r galGal4 -i Pairwise/galgal_falper.fa -o KaKs/
+
+# CodeML 
+	cd AlignmentProcessor/
+	cp controlFiles/pairwiseAlt.ctl PairwiseCodeML1.2
+	python AlignmentProcessor.py -t 4 --phylip --codeml -r galGal4 -i Pairwise/galgal_falper.fa -o PairwiseCodeML/
+
+#-------------------------------
+# 4. Prepare Output for analysis in R
+#-------------------------------
+
+# Download a list of gene and transcript IDs and their chromosome for galGal4 from Ensembl BioMart (v83). 
+# Join this list with the Ka/Ks results to add gene, scaffold, and chromosome information to the results.
+# Prior to joining, open each list in Excel and sort both files either ascending or descending by their transcript IDs.
+# It does not matter which one you use, as long as you use the same one for each (join will not work properly if they are sorted differently).
+
+# KaKs
+	join -t "," --header --check-order -1 2 -2 1 "Pairwise/galGal4GeneTranscriptIDs.txt" "KaKs1.2/KaKs.csv" > "KaKs1.2/galGal4KaKs.csv"
+
+# CodeML
+	python bin/ConcatenateCodeML.py --pairwise -i PairwiseCodeML1.2/04_CodemlOutput -o PairwiseCodeML1.2/pairwiseAlt.csv
+	join -t "," --header --check-order -1 2 -2 1 "Pairwise/galGal4GeneTranscriptIDs.txt" "PairwiseCodeML1.2/pairwiseAlt.csv" > "PairwiseCodeML1.2/galGal4Pairwise.csv"
+
+#-------------------------------
+# 5. Permutation test for Z chromosome vs. Autosomes
+#-------------------------------
+
+# Make two csv files from the joined file in step 4, one for only autosomal genes and one for only Z-linked genes. 
+# Manually remove any genes with NAs for Ka, Ks, or Ka/Ks.
+
+	cd PermutationScripts/
+	
+	python permutation.py --c1 7 --c2 7 -i1 KaKsOut/galGal_falPerAutosomalKaKs.csv -i2 KaKsOut/galGal_falPerZKaKs.csv
+
+